{
  "version": "1.0.3",
  "homePage": "",
  "Intrinsic_help_url": "https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics",
  "BuiltIn_url": "https://gcc.gnu.org/onlinedocs/",
  "Category": [
    {
      "cnName": "压缩库",
      "enName": "Compression library",
      "cnDesc": "使用鲲鹏硬件加速模块或鲲鹏指令对业界主流的开源压缩库（zlib、gzip、zstd、snappy等）进行性能优化，优化后压缩库通过鲲鹏社区发布",
      "enDesc": "The Kunpeng hardware acceleration module or Kunpeng instructions are used to optimize the performance of mainstream open-source compression libraries (such as zlib, gzip, zstd, and Snappy). The optimized compression libraries are released in the Kunpeng Community.",
      "id": "1"
    },
    {
      "cnName": "系统库",
      "enName": "System library",
      "cnDesc": "基于鲲鹏微架构特点，使用鲲鹏指令对系统通用的基础库进行性能优化，以及传统平台的指令函数映射到鲲鹏平台的公共模块",
      "enDesc": "Kunpeng instructions are used to optimize the performance of the common basic library of the system based on the Kunpeng micro-architecture features, and the instruction functions of the traditional platform are mapped to the common modules of the Kunpeng platform.",
      "id": "2"
    },
    {
      "cnName": "加解密库",
      "enName": "Encryption and decryption library",
      "cnDesc": "使用鲲鹏硬件加速模块及鲲鹏指令对openssl库进行性能优化，支持硬加速与指令加速的自动协同、应用逻辑无需修改即可使用加解密加速库",
      "enDesc": "The Kunpeng hardware acceleration module and Kunpeng instructions are used to optimize the performance of the OpenSSL library. Automatic collaboration between hardware acceleration and instruction acceleration is supported. The encryption and decryption acceleration library can be used without modifying the application logic.",
      "id": "3"
    },
    {
      "cnName": "媒体库",
      "enName": "Media library",
      "cnDesc": "基于鲲鹏加速指令提供高性能媒体原语库及视频编解码库",
      "enDesc": "High-performance media primitive library and video encoding/decoding library are provided based on Kunpeng acceleration instructions.",
      "id": "4"
    },
    {
      "cnName": "数学库",
      "enName": "Math library",
      "cnDesc": "鲲鹏数学库提供了基于鲲鹏平台优化的高性能数学函数，所有接口由C/C++、汇编语言实现。",
      "enDesc": "The Kunpeng Math Library (KML) provides high-performance mathematical functions optimized based on the Kunpeng platform. All interfaces are implemented by C/C++ and the assembly language.",
      "id": "5"
    }
  ],
  "library": [
    {
      "name": "glibc_patch",
      "category id": "2",
      "cnDesc": "对内存、字符串、锁等接口基于华为鲲鹏920处理器微架构特点进行了加速优化",
      "enDesc": "The memory, string, and lock are optimized and accelerated based on the microarchitecture of Huawei Kunpeng 920 processors.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/system",
      "code_url": "http://ftp.jaist.ac.jp/pub/GNU/libc"
    },
    {
      "name": "hyperscan",
      "category id": "2",
      "cnDesc": "基于鲲鹏微架构优势，使用鲲鹏指令加速正则表达式的编译、扫描性能",
      "enDesc": "Kunpeng instructions are used to accelerate the compilation and scanning performance of regular expressions based on the advantages of the Kunpeng micro-architecture.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/system",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/GPC/"
    },
    {
      "name": "AVX2Neon",
      "category id": "2",
      "cnDesc": "AVX2Neon是一款接口集合库。当使用Intrinsic类接口的应用程序从传统平台迁移到鲲鹏计算平台时，由于各个平台的Intrinsic函数定义不同，需要逐一对Intrinsic函数重新进行适配开发。针对该问题，我们提供了AVX2Neon模块，将传统平台的Intrinsic接口集合使用鲲鹏指令重新实现，并封装为独立的接口模块(C语言头文件方式)，以减少大量迁移项目重复开发的工作量。用户可以通过将头文件导入应用程序即可继续使用传统平台的Intrinsic函数。",
      "enDesc": "AVX2Neon is an interface collection library. When an application using the Intrinsic interface is ported from a conventional platform to the Kunpeng platform, Intrinsic functions need to be re-adapted and developed one by one due to the different Intrinsic function syntax on the platforms. To address this problem, the AVX2Neon module is provided. The Intrinsic interface collection on the conventional platform is re-implemented by using Kunpeng instructions and encapsulated as an independent interface module (in C language header file mode)to reduce the workload of repeated development of porting projects. Users can continue to use the Intrinsic functions of the conventional platform by importing the header file into the application.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/system",
      "code_url": "%该软件包部分源码开放：https://github.com/kunpengcompute/AvxToNeon，未开放部分如有需求请访问 https://www.hikunpeng.com/developer/boostkit/library/system，选择AVX2Neon，点击“立即申请”下载申请表并填写信息后发送至邮箱kunpengcompute@huawei.com%"
    },
    {
      "name": "gzip",
      "category id": "1",
      "cnDesc": "基于gzip-1.10，通过数据预取、循环展开、CRC指令替换等方法，来提升其在鲲鹏平台上的压缩和解压缩速率，尤其对文本类型文件的压缩及解压具有更明显的性能优势",
      "enDesc": "Data prefetch, loop unrolling, and CRC instruction replacement are used based on gzip-1.10 to improve the compression and decompression speed on the Kunpeng platform, especially the compression and decompression of text files.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/compression",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/GPC/"
    },
    {
      "name": "zstd",
      "category id": "1",
      "cnDesc": "基于zstd-1.4.4，通过使用NEON指令、内联汇编、代码结构调整、内存预取、指令流水线排布优化等方法，实现zstd在鲲鹏平台上压缩和解压性能的提升",
      "enDesc": "The zstd compression and decompression performance on the Kunpeng platform is improved by using NEON instructions, inline assembly, and memory prefetch, adjusting code structure, and optimizing instruction pipeline layout based on zstd-1.4.4.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/compression",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/GPC/"
    },
    {
      "name": "snappy",
      "category id": "1",
      "cnDesc": "基于snappy-1.1.7，使用内联汇编、高宽位指令、优化CPU流水线、内存预取等方法，实现snappy在鲲鹏平台上的压缩和解压速率提升",
      "enDesc": "The Snappy compression and decompression rates on the Kunpeng platform are improved by using inline assembly, high-bit instructions, optimized CPU pipeline, and memory prefetch based on Snappy 1.1.7.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/compression",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/GPC/"
    },
    {
      "name": "KAEzip",
      "category id": "1",
      "cnDesc": "KAEzip是鲲鹏加速引擎的压缩模块，使用鲲鹏硬加速模块实现deflate算法，结合无损用户态驱动框架，提供高性能gzip/zlib格式压缩接口",
      "enDesc": "KAEzip is the compression module of the Kunpeng acceleration engine. It uses the Kunpeng hardware acceleration module to implement the deflate algorithm and works with the lossless user-mode driver framework to provide an interface for high-performance compression in gzip or zlib format.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/compression",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/GPC/"
    },
    {
      "name": "KAECrypto",
      "category id": "3",
      "cnDesc": "使用鲲鹏硬加速模块实现RSA/SM3/SM4/DH/MD5/AES算法，结合无损用户态驱动框架，提供高性能对称加解密、非对称加解密算法能力，兼容openssl1.1.1a及其之后版本，支持同步&异步机制",
      "enDesc": "The Kunpeng hardware acceleration module implements the RSA, SM3, SM4, DH, MD5, and AES algorithms, provides high-performance symmetric and asymmetric encryption and decryption based on the lossless user-mode driver framework. It is compatible with OpenSSL 1.1.1a and later versions and supports synchronous and asynchronous mechanisms.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/encryption-decryption",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/GPC/"
    },
    {
      "name": "x265-patch",
      "category id": "4",
      "cnDesc": "针对ffmpeg视频转码场景，对x265的转码底层算子使用鲲鹏向量指令进行加速优化，提高转码性能",
      "enDesc": "The underlying x265 transcoding operators are accelerated and optimized by using the Kunpeng vector instruction to improve the transcoding performance in FFmpeg video transcoding scenarios.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/media",
      "code_url": "http://x265.org/blog/"
    },
    {
      "name": "HW265",
      "category id": "4",
      "cnDesc": "HW265视频编码器是符合H.265/HEVC视频编码标准、基于鲲鹏处理器NEON指令加速的华为自研H.265视频编码器。HW265支持四个预设编码档位可选，对应不同编码速度的应用场景，码率控制支持平均比特率模式（ABR）和恒定QP模式（CQP），功能涵盖直播、点播等各个场景，整体性能优于目前的主流开源软件。",
      "enDesc": "HW265 is a Huawei-developed H.265 video encoder that complies with the H.265/HEVC video encoding standard and uses the NEON instructions of Kunpeng processors for acceleration. HW265 supports four preset encoding levels that correspond to application scenarios with different encoding speeds. The bit rate control supports the average bit rate(ABR) and constant QP(CQP) modes. The functions cover various scenarios such as live TV and VOD. The overall performance of HW265 is better than that of mainstream open source software.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/media",
      "code_url": "%该软件包暂不支持公开下载，如有需求请访问 https://www.hikunpeng.com/developer/boostkit/library/media，选择HW265，点击“立即申请”下载申请表并填写相关信息后发送至邮箱kunpengcompute@huawei.com%"
    },
    {
      "name": "HMPP",
      "category id": "4",
      "cnDesc": "鲲鹏超媒体性能库HMPP(Hyper Media Performance Primitives)包括向量缓冲区的分配与释放、向量初始化、向量数学运算与统计学运算、向量采样与向量变换、滤波函数、变换函数(快速傅里叶变换)，支持IEEE 754浮点数运算标准，支持鲲鹏平台下使用。",
      "enDesc": "Kunpeng Hyper Media Performance Primitives(HMPP) prodives functions for allocating and releasing vector buffers, vector initialization, vector mathematical operations, vector statistics operations, vector sampling and conversion, filtering functions, as well as transform(such as fast fourier transform)functions. It complies with the IEEE 754(a technical standard for floating-point arithmetic)and can be used on the Kunpeng platform.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/media",
      "code_url": "%该软件包暂不支持公开下载，如有需求请访问：DEB包 https://support.huawei.com/enterprise/zh/software/252271741-ESW2000307263; RPM包 https://support.huawei.com/enterprise/zh/software/252271741-ESW2000307262，注册/登录华为账号并申请下载%"
    },
    {
      "name": "KML_FFT",
      "category id": "5",
      "cnDesc": "KML_FFT基于鲲鹏架构，通过向量化、算法改进，对快速离散傅里叶变换进行了深度优化，使得快速傅里叶变换接口函数的性能由大幅度提升。",
      "enDesc": "Based on the Kunpeng architecture, KML_FFT deeply optimizes the fast fourier transform(FFT) by using vectorization and algorithm improvement, which greatly improves the performance of the FFT interface functions.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/math",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/HPC/"
    },
    {
      "name": "KML_BLAS",
      "category id": "5",
      "cnDesc": "KML_BLAS基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，对BLAS的计算效率进行了深度挖掘，使得BLAS接口函数的性能逼近理论峰值。",
      "enDesc": "Based on the Kunpeng architecture, KML_BLAS performs in-depth mining on the computing efficiency of BLAS by means of vectorization, data prefetch, compilation optimization, and data rearrangement. As a result, the performance of BLAS interface functions approaches the theoretical peak.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/math",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/HPC/"
    },
    {
      "name": "KML_SPBLAS",
      "category id": "5",
      "cnDesc": "KML_SPBLAS基于鲲鹏架构，充分利用鲲鹏的指令集和架构特点，开发了高性能稀疏矩阵运算库，提升HPC和大数据解决方案业务性能。",
      "enDesc": "Based on the instruction set and architecture features of Kunpeng, KML_SPBLAS develops a high-performance sparse matrix operation library to improve the service performance of HPC and big data solutions.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/math",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/HPC/"
    },
    {
      "name": "KML_MATH",
      "category id": "5",
      "cnDesc": "KML_MATH通过周期函数规约、算法改进等手段，提供了基于鲲鹏处理器性能提升较大的函数实现。",
      "enDesc": "KML_MATH provides functions with high performance that is based on Kunpeng processors by means of periodic function reduction and algorithm improvement.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/math",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/HPC/"
    },
    {
      "name": "KML_VML",
      "category id": "5",
      "cnDesc": "KML_VML通过NEON指令优化、内联汇编等方法，对输入数据进行向量化处理，充分利用了鲲鹏架构下的寄存器特点，实现了在鲲鹏处理器上的性能提升。",
      "enDesc": "KML_VML performs vectorization on input data by using methods such as NEON instruction optimization and inline assembly. It uses the register features in the Kunpeng architecture to imporve the performance of Kunpeng processors.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/math",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/HPC/"
    },
    {
      "name": "KML_LAPACK",
      "category id": "5",
      "cnDesc": "KML_LAPACK通过分块、求解算法组合、多线程、BLAS接口优化等手段，基于鲲鹏架构对LAPACK的计算效率进行了优化，实现了在鲲鹏处理器上的性能提升。",
      "enDesc": "KML_LAPACK optimizes the LAPACK based on the Kunpeng architecture by means of block division, algorithm combination, multithreading, and Basic Linear Algebra Subprograms(BLAS) interface optimization,  imporving the performance of Kunpeng processors.",
      "kunpeng_url": "https://www.hikunpeng.com/developer/boostkit/library/math",
      "code_url": "https://mirrors.huaweicloud.com/kunpeng/archive/Kunpeng_SDK/HPC/"
    }
  ],
  "function": [
    {
      "name": "memcpy",
      "library": "glibc_patch",
      "headerfile": "",
      "desc_cn": "内存复制函数",
      "desc_en": "Memory copy function",
      "benefit_cn": "使用了Neon指令进行加速优化，已合入gnu社区",
      "benefit_en": "The NEON instructions are used for acceleration and have been incorporated into the GNU community."
    },
    {
      "name": "memset",
      "library": "glibc_patch",
      "headerfile": "",
      "desc_cn": "内存初始化",
      "desc_en": "Memory initialization",
      "benefit_cn": "使用了Neon指令进行加速优化，已合入gnu社区",
      "benefit_en": "The NEON instructions are used for acceleration and have been incorporated into the GNU community."
    },
    {
      "name": "memcmp",
      "library": "glibc_patch",
      "headerfile": "",
      "desc_cn": "内存比较",
      "desc_en": "Memory comparison",
      "benefit_cn": "使用了Neon指令进行加速优化，已合入gnu社区",
      "benefit_en": "The NEON instructions are used for acceleration and have been incorporated into the GNU community."
    },
    {
      "name": "memrchr",
      "library": "glibc_patch",
      "headerfile": "",
      "desc_cn": "内存初始化",
      "desc_en": "Memory initialization",
      "benefit_cn": "使用了Neon指令进行加速优化，已合入gnu社区",
      "benefit_en": "The NEON instructions are used for acceleration and have been incorporated into the GNU community."
    },
    {
      "name": "strcpy",
      "library": "glibc_patch",
      "headerfile": "",
      "desc_cn": "字符串拷贝",
      "desc_en": "String copy",
      "benefit_cn": "使用了Neon指令进行加速优化，已合入gnu社区",
      "benefit_en": "The NEON instructions are used for acceleration and have been incorporated into the GNU community."
    },
    {
      "name": "strlen",
      "library": "glibc_patch",
      "headerfile": "",
      "desc_cn": "计算字符串长度",
      "desc_en": "Calculates the string length",
      "benefit_cn": "使用了Neon指令进行加速优化，已合入gnu社区",
      "benefit_en": "The NEON instructions are used for acceleration and have been incorporated into the GNU community."
    },
    {
      "name": "strnlen",
      "library": "glibc_patch",
      "headerfile": "",
      "desc_cn": "计算指定内存区的字符串长度",
      "desc_en": "Calculates the length of a string in a specified memory area",
      "benefit_cn": "使用了Neon指令进行加速优化，已合入gnu社区",
      "benefit_en": "The NEON instructions are used for acceleration and have been incorporated into the GNU community."
    },
    {
      "name": "deflate",
      "library": "gzip",
      "headerfile": "",
      "desc_cn": "gzip工具的压缩接口",
      "desc_en": "Compression interface of the gzip tool",
      "benefit_cn": "使用了指令预取、循环展开实现性能优化",
      "benefit_en": "Instruction prefetch and loop unrolling are used to optimize the performance.",
      "func_name": "int deflate(z_streamp strm, int flush)",
      "headerfile_desc": "gzip.h",
      "parameters": [
        "strm: pointer to zlib compressed stream",
        "flush: sourceLen ? Z_NO_FLUSH : Z_FINISH"
      ],
      "return": []
    },
    {
      "name": "inflate",
      "library": "gzip",
      "headerfile": "",
      "desc_cn": "gzip工具的解压接口",
      "desc_en": "Decompression interface of the gzip tool",
      "benefit_cn": "使用了CRC加速指令实现性能优化",
      "benefit_en": "The CRC acceleration instruction is used to optimize the performance.",
      "func_name": "int inflate(z_streamp strm, int flush)",
      "headerfile_desc": "gzip.h",
      "parameters": [
        "strm: pointer to zlib compressed stream",
        "flush: "
      ],
      "return": []
    },
    {
      "name": "hs_scan",
      "library": "hyperscan",
      "headerfile": "https://gitee.com/kunpengcompute/hyperscan/raw/aarch64/src/hs_runtime.h",
      "desc_cn": "块模式的正则表达式扫描",
      "desc_en": "Regular expression scanning in block mode",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, unsigned int length, unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context)",
      "headerfile_desc": "hs_runtime.h",
      "parameters": [
        "const hs_database_t *db: A compiled pattern database.",
        "const char *data: Pointer to the data to be scanned.",
        "unsigned int length: The number of bytes to scan.",
        "unsigned int flags: Flags modifying the behaviour of this function. This parameter is provided for future use and is unused at present.",
        "hs_scratch_t *scratch: A per-thread scratch space allocated by @ref hs_alloc_scratch() for this database.",
        "match_event_handler onEvent: Pointer to a match event callback function. If a NULL pointer is given, no matches will be returned.",
        "void *context: The user defined pointer which will be passed to the callback function."
      ],
      "return": "Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match callback indicated that scanning should stop; other values on error."
    },
    {
      "name": "hs_scan_vector",
      "library": "hyperscan",
      "headerfile": "https://gitee.com/kunpengcompute/hyperscan/raw/aarch64/src/hs_runtime.h",
      "desc_cn": "向量正则表达式扫描",
      "desc_en": "Vector regular expression scanning",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, const char *const *data, const unsigned int *length, unsigned int count, unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *context);",
      "headerfile_desc": "hs_runtime.h",
      "parameters": [
        "const hs_database_t *db:  A compiled pattern database.",
        "const char *const *data: An array of pointers to the data blocks to be scanned.",
        "const unsigned int *length: An array of lengths (in bytes) of each data block to scan.",
        "unsigned int count: Number of data blocks to scan. This should correspond to the size of of the @p data and @p length arrays.",
        "unsigned int flags:  Flags modifying the behaviour of this function. This parameter is provided for future use and is unused at present.",
        "hs_scratch_t *scratch: A per-thread scratch space allocated by @ref hs_alloc_scratch() for this database.",
        "match_event_handler onEvent: Pointer to a match event callback function. If a NULL pointer is given, no matches will be returned.",
        "void *context: The user defined pointer which will be passed to the callback function."
      ],
      "return": "Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match callback indicated that scanning should stop; other values on error."
    },
    {
      "name": "hs_scan_stream",
      "library": "hyperscan",
      "headerfile": "https://gitee.com/kunpengcompute/hyperscan/raw/aarch64/src/hs_runtime.h",
      "desc_cn": "将待扫描数据写入打开的流中",
      "desc_en": "Write data to be scanned to the opened stream",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, unsigned int length, unsigned int flags, hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt)",
      "headerfile_desc": "hs_runtime.h",
      "parameters": [
        "hs_stream_t *id: The stream ID (returned by @ref hs_open_stream()) to which the data will be written.",
        "const char *data: Pointer to the data to be scanned.",
        "unsigned int length: The number of bytes to scan.",
        "unsigned int flags: Flags modifying the behaviour of the stream. This parameter is provided for future use and is unused at present.",
        "hs_scratch_t *scratch: A per-thread scratch space allocated by @ref hs_alloc_scratch().",
        "match_event_handler onEvent: Pointer to a match event callback function. If a NULL pointer is given, no matches will be returned.",
        "void *ctxt: The user defined pointer which will be passed to the callback function when a match occurs."
      ],
      "return": "Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match callback indicated that scanning should stop; other values on error."
    },
    {
      "name": "hs_compile",
      "library": "hyperscan",
      "headerfile": "https://gitee.com/kunpengcompute/hyperscan/raw/aarch64/src/hs_compile.h",
      "desc_cn": "导入单个正则表达式",
      "desc_en": "Import a regular expression",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags, unsigned int mode, const hs_platform_info_t *platform, hs_database_t **db, hs_compile_error_t **error)",
      "headerfile_desc": "hs_compile.h",
      "parameters": [
        "const char *expression: The NULL-terminated expression to parse. Note that this string must represent ONLY the pattern to be matched, with no delimiters or flags; any global flags should be specified with the @p flags argument. For example, the expression `/abc?def/i` should be compiled by providing `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a flags.",
        "unsigned int flags: Flags which modify the behaviour of the expression.",
        "unsigned int mode: Compiler mode flags that affect the database as a whole. One of @ref HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be supplied, to select between the generation of a streaming, block or vectored database. In addition, other flags (beginning with HS_MODE_) may be supplied to enable specific features. See @ref HS_MODE_FLAG for more details.",
        "const hs_platform_info_t *platform: If not NULL, the platform structure is used to determine the target platform for the database. If NULL, a database suitable for running on the current host platform is produced.",
        "hs_database_t **db: On success, a pointer to the generated database will be returned in this parameter, or NULL on failure. The caller is responsible for deallocating the buffer using the @ref hs_free_database() function.",
        "hs_compile_error_t **error: If the compile fails, a pointer to a @ref hs_compile_error_t will be returned, providing details of the error condition. The caller is responsible for deallocating the buffer using the @ref hs_free_compile_error() function."
      ],
      "return": " @ref HS_SUCCESS is returned on successful compilation; @ref HS_COMPILER_ERROR on failure, with details provided in the error parameter."
    },
    {
      "name": "hs_compile_multi",
      "library": "hyperscan",
      "headerfile": "https://gitee.com/kunpengcompute/hyperscan/raw/aarch64/src/hs_compile.h",
      "desc_cn": "导入多个正则表达式",
      "desc_en": "Import multiple regular expressions",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions, const unsigned int *flags, const unsigned int *ids, unsigned int elements, unsigned int mode, const hs_platform_info_t *platform, hs_database_t **db, hs_compile_error_t **error)",
      "headerfile_desc": "hs_compile.h",
      "parameters": [
        "const char *const *expressions: Array of NULL-terminated expressions to compile. Note that (as for @ref hs_compile()) these strings must contain only the pattern to be matched, with no delimiters or flags. For example, the expression `/abc?def/i` should be compiled by providing `abc?def` as the first string in the @p expressions array, and @ref HS_FLAG_CASELESS as the first value in the @p flags array.",
        "const unsigned int *flags: Array of flags which modify the behaviour of each expression.",
        "const unsigned int *ids: An array of integers specifying the ID number to be associated with the corresponding pattern in the expressions array. Specifying the NULL pointer in place of an array will set the ID value for all patterns to zero.",
        "unsigned int elements: The number of elements in the input arrays.",
        "unsigned int mode: Compiler mode flags that affect the database as a whole. One of @ref HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be supplied, to select between the generation of a streaming, block or vectored database. In addition, other flags (beginning with HS_MODE_) may be supplied to enable specific features. See @ref HS_MODE_FLAG for more details.",
        "const hs_platform_info_t *platform: If not NULL, the platform structure is used to determine the target platform for the database. If NULL, a database suitable for running on the current host platform is produced.",
        "hs_database_t **db: On success, a pointer to the generated database will be returned in this parameter, or NULL on failure. The caller is responsible for deallocating the buffer using the @ref hs_free_database() function.",
        "hs_compile_error_t **error: If the compile fails, a pointer to a @ref hs_compile_error_t will be returned, providing details of the error condition. The caller is responsible for deallocating the buffer using the @ref hs_free_compile_error() function."
      ],
      "return": "@ref HS_SUCCESS is returned on successful compilation; @ref HS_COMPILER_ERROR on failure, with details provided in the @p error parameter."
    },
    {
      "name": "hs_compile_ext_multi",
      "library": "hyperscan",
      "headerfile": "https://gitee.com/kunpengcompute/hyperscan/raw/aarch64/src/hs_compile.h",
      "desc_cn": "导入多个含扩展参数的正则表达式",
      "desc_en": "Import multiple regular expressions containing extended parameters",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions, const unsigned int *flags, const unsigned int *ids, const hs_expr_ext_t *const *ext, unsigned int elements, unsigned int mode, const hs_platform_info_t *platform, hs_database_t **db, hs_compile_error_t **error)",
      "headerfile_desc": "hs_compile.h",
      "parameters": [
        "const char *const *expressions: Array of NULL-terminated expressions to compile. Note that (as for @ref hs_compile()) these strings must contain only the pattern to be matched, with no delimiters or flags. For example, the expression `/abc?def/i` should be compiled by providing `abc?def` as the first first value in the @p flags array.",
        "const unsigned int *flags: Array of flags which modify the behaviour of each expression.",
        "const unsigned int *ids: An array of integers specifying the ID number to be associated with the corresponding pattern in the expressions array. Specifying the NULL pointer in place of an array will set the ID value for all patterns to zero.",
        "const hs_expr_ext_t *const *ext: An array of pointers to filled @ref hs_expr_ext_t structures that define extended behaviour for each pattern. NULL may be specified if no extended behaviour is needed for an individual pattern, or in place of the whole array if it is not needed for any expressions. Memory used by these structures must be both allocated and freed by the caller.",
        "unsigned int elements: The number of elements in the input arrays.",
        "unsigned int mode: Compiler mode flags that affect the database as a whole. One of @ref HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be supplied, to select between the generation of a streaming, block or vectored database. In addition, other flags (beginning with HS_MODE_) may be supplied to enable specific features. See @ref HS_MODE_FLAG for more details.",
        "const hs_platform_info_t *platform: If not NULL, the platform structure is used to determine the target platform for the database. If NULL, a database suitable for running on the current host platform is produced.",
        "hs_database_t **db: On success, a pointer to the generated database will be returned in this parameter, or NULL on failure. The caller is responsible for deallocating the buffer using the @ref hs_free_database() function.",
        "hs_compile_error_t **error: If the compile fails, a pointer to a @ref hs_compile_error_t will be returned, providing details of the error condition. The caller is responsible for deallocating the buffer using the @ref hs_free_compile_error() function."
      ],
      "return": "@ref HS_SUCCESS is returned on successful compilation; @ref HS_COMPILER_ERROR on failure, with details provided in the @p error parameter."
    },
    {
      "name": "ZSTD_compress",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "zstd块压缩",
      "desc_en": "zstd block compression",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize",
        "int compressionLevel"
      ],
      "return": "compressed size written into `dst` (<= `dstCapacity), or an error code if it fails (which can be tested using ZSTD_isError())."
    },
    {
      "name": "ZSTD_decompress",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "zstd块解压",
      "desc_en": "zstd block decompression",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "void* dst",
        "size_t dstCapacity: an upper bound of originalSize to regenerate.",
        "const void* src",
        "size_t compressedSize: must be the _exact_ size of some number of compressed and/or skippable frames."
      ],
      "return": "the number of bytes decompressed into `dst` (<= `dstCapacity`), or an errorCode if it fails (which can be tested using ZSTD_isError())"
    },
    {
      "name": "ZSTD_compressCCtx",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "参数可复用的zstd块压缩接口",
      "desc_en": "zstd block compression interface with reusable parameters",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* cctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize",
        "int compressionLevel"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_decompressDCtx",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "参数可复用的zstd流解压接口",
      "desc_en": "zstd stream decompression interface with reusable parameters",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_DCtx* dctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compress2",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "参数复用的zstd流解压扩展接口",
      "desc_en": "zstd stream decompression extension interface with reusable parameters",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* cctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compressStream",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "zstd流压缩",
      "desc_en": "zstd stream compression",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CStream* zcs",
        "ZSTD_outBuffer* output",
        "ZSTD_inBuffer* input"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compressStream2",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "zstd流压缩扩展接口",
      "desc_en": "zstd stream compression extension interface",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input, ZSTD_EndDirective endOp)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* cctx",
        "ZSTD_outBuffer* output",
        "ZSTD_inBuffer* input",
        "ZSTD_EndDirective endOp"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_decompressStream",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "zstd流解压",
      "desc_en": "zstd stream decompression",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_DStream* zds",
        "ZSTD_outBuffer* output",
        "ZSTD_inBuffer* input"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compress_usingDict",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "基于字典的zstd块压缩",
      "desc_en": "Dictionary-based zstd block compression",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, int compressionLevel)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* ctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize",
        "const void* dict",
        "size_t dictSize",
        "int compressionLevel"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_decompress_usingDict",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "基于字典的zstd块解压",
      "desc_en": "Dictionary-based zstd block decompression",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_DCtx* dctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize",
        "const void* dict",
        "size_t dictSize"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compress_usingCDict",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "基于摘要字典的zstd块压缩",
      "desc_en": "zstd block compression based on the digest dictionary",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_CDict* cdict)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* cctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize",
        "const ZSTD_CDict* cdict"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_decompress_usingDDict",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "基于摘要字典的zstd块解压",
      "desc_en": "zstd block decompression based on the digest dictionary",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const ZSTD_DDict* ddict)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_DCtx* dctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize",
        "const ZSTD_DDict* ddict"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compressBegin",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "低内存消耗的流压缩初始化接口",
      "desc_en": "Stream compression initialization interface with low memory consumption",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* cctx",
        "int compressionLevel"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compressContinue",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "低内存消耗的流压缩接口",
      "desc_en": "Stream compression interface with low memory consumption",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* cctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize"
      ],
      "return": ""
    },
    {
      "name": "ZSTD_compressEnd",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "低内存消耗的流压缩终止接口",
      "desc_en": "Stream compression termination interface with low memory consumption",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_CCtx* cctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize"
      ],
      "return": "Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum."
    },
    {
      "name": "ZSTD_decompressBegin",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "低内存消耗的流解压初始化接口",
      "desc_en": "Stream decompression initialization interface with low memory consumption",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_DCtx* dctx"
      ],
      "return": "When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero). A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0"
    },
    {
      "name": "ZSTD_decompressContinue",
      "library": "zstd",
      "headerfile": "https://gitee.com/kunpengcompute/zstd/raw/aarch64-1.4.4/lib/zstd.h",
      "desc_cn": "低内存消耗的流解压接口",
      "desc_en": "Stream decompression interface with low memory consumption",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)",
      "headerfile_desc": "zstd.h",
      "parameters": [
        "ZSTD_DCtx* dctx",
        "void* dst",
        "size_t dstCapacity",
        "const void* src",
        "size_t srcSize"
      ],
      "return": "returns 0 : it only skips the content."
    },
    {
      "name": "snappy_compress",
      "library": "snappy",
      "headerfile": "https://gitee.com/kunpengcompute/snappy/raw/aarch64-1.1.7/snappy-c.h",
      "desc_cn": "块模式压缩接口",
      "desc_en": "Block-mode compression interface",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "snappy_status snappy_compress(const char* input, size_t input_length, char* compressed, size_t* compressed_length)",
      "headerfile_desc": "snappy-c.h",
      "parameters": [
        "const char* input",
        "size_t input_length",
        "char* compressed",
        "size_t* compressed_length"
      ],
      "return": "Returns failure (a value not equal to SNAPPY_OK) if the message is corrupted and could not be decrypted."
    },
    {
      "name": "snappy_uncompress",
      "library": "snappy",
      "headerfile": "https://gitee.com/kunpengcompute/snappy/raw/aarch64-1.1.7/snappy-c.h",
      "desc_cn": "块模式解压接口",
      "desc_en": "Block-mode decompression interface",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions",
      "func_name": "snappy_status snappy_uncompress(const char* compressed, size_t compressed_length, char* uncompressed, size_t* uncompressed_length)",
      "headerfile_desc": "snappy-c.h",
      "parameters": [
        "const char* compressed",
        "size_t compressed_length",
        "char* uncompressed",
        "size_t* uncompressed_length"
      ],
      "return": "Returns the maximal size of the compressed representation of input data that is 'source_length' bytes in length."
    },
    {
      "name": "deflate",
      "library": "KAEzip",
      "headerfile": "https://gitee.com/kunpengcompute/KAEzip/raw/master/include/kaezip.h",
      "desc_cn": "zlib流压缩接口",
      "desc_en": "zlib stream compression interface",
      "benefit_cn": "基于鲲鹏加速器进行性能优化",
      "benefit_en": "Performance optimized based on the Kunpeng accelerator",
      "func_name": "int deflate(z_streamp strm, int flush)",
      "headerfile_desc": "kaezip.h",
      "parameters": [
        "strm: pointer to zlib compressed stream",
        "flush: sourceLen ? Z_NO_FLUSH : Z_FINISH"
      ],
      "return": []
    },
    {
      "name": "inflate",
      "library": "KAEzip",
      "headerfile": "https://gitee.com/kunpengcompute/KAEzip/raw/master/include/kaezip.h",
      "desc_cn": "zlib流解压接口",
      "desc_en": "zlib stream decompression interface",
      "benefit_cn": "基于鲲鹏加速器进行性能优化",
      "benefit_en": "Performance optimized based on the Kunpeng accelerator",
      "func_name": "int inflate(z_streamp strm, int flush)",
      "headerfile_desc": "kaezip.h",
      "parameters": [
        "strm: pointer to zlib compressed stream",
        "flush: "
      ],
      "return": []
    },
    {
      "name": "compress",
      "library": "KAEzip",
      "headerfile": "https://gitee.com/kunpengcompute/KAEzip/raw/master/include/kaezip.h",
      "desc_cn": "zlib块压缩接口",
      "desc_en": "zlib block compression interface",
      "benefit_cn": "基于鲲鹏加速器进行性能优化",
      "benefit_en": "Performance optimized based on the Kunpeng accelerator",
      "func_name": "int compress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)",
      "headerfile_desc": "kaezip.h",
      "parameters": [
        "dest: the destination buffer",
        "destLen: destLen is the total size of the destination buffer, which must be at least 0.1% larger than sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.",
        "source: the source buffer",
        "sourceLen: sourceLen is the byte length of the source buffer."
      ],
      "return": [
        "compress returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer."
      ]
    },
    {
      "name": "compress2",
      "library": "KAEzip",
      "headerfile": "https://gitee.com/kunpengcompute/KAEzip/raw/master/include/kaezip.h",
      "desc_cn": "zlib块压缩扩展接口",
      "desc_en": "zlib block compression extension interface",
      "benefit_cn": "基于鲲鹏加速器进行性能优化",
      "benefit_en": "Performance optimized based on the Kunpeng accelerator",
      "func_name": "int compress2(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level)",
      "headerfile_desc": "kaezip.h",
      "parameters": [
        "dest: the destination buffer",
        "destLen: destLen is the total size of the destination buffer, which must be at least 0.1% larger than sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.",
        "source: the source buffer",
        "sourceLen: sourceLen is the byte length of the source buffer.",
        "level: The level parameter has the same meaning as in deflateInit. "
      ],
      "return": [
        "compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer, Z_STREAM_ERROR if the level parameter is invalid."
      ]
    },
    {
      "name": "uncompress",
      "library": "KAEzip",
      "headerfile": "https://gitee.com/kunpengcompute/KAEzip/raw/master/include/kaezip.h",
      "desc_cn": "zlib块解压接口",
      "desc_en": "zlib block decompression interface",
      "benefit_cn": "基于鲲鹏加速器进行性能优化",
      "benefit_en": "Performance optimized based on the Kunpeng accelerator",
      "func_name": "int uncompress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)",
      "headerfile_desc": "kaezip.h",
      "parameters": [
        "dest: the destination buffer",
        "destLen: destLen is the total size of the destination buffer, which must be large enough to hold the entire uncompressed data.",
        "source: the source buffer",
        "sourceLen: sourceLen is the byte length of the source buffer"
      ],
      "return": [
        "uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer, or Z_DATA_ERROR if the input data was corrupted, including if the input data is an incomplete zlib stream."
      ]
    },
    {
      "name": "EVP_EncryptInit_ex",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl加密初始化接口",
      "desc_en": "OpenSSL encryption initialization interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx,const EVP_CIPHER *cipher, ENGINE *impl, const unsigned char *key,const unsigned char *iv)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_CIPHER_CTX context",
        "cipher: EVP_CIPHER cipher",
        "impl: ENGINE impl",
        "key: the symmetric key to use",
        "iv: the IV to use"
      ],
      "return": [
        "EVP_EncryptInit_ex() returns 1 for success and 0 for failure."
      ]
    },
    {
      "name": "EVP_EncryptUpdate",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl加密计算接口",
      "desc_en": "OpenSSL encryption calculation interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out,int *outl, const unsigned char *in, int inl)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_CIPHER_CTX context",
        "out: writes the encrypted version to out",
        "outl: the actual number of bytes written",
        "in: the buffer",
        "inl: inl bytes"
      ],
      "return": [
        "EVP_EncryptUpdate() returns 1 for success and 0 for failure."
      ]
    },
    {
      "name": "EVP_EncryptFinal",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl加密完成接口",
      "desc_en": "OpenSSL encryption completion interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_EncryptFinal(EVP_CIPHER_CTX *ctx, unsigned char *out,int *outl)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_CIPHER_CTX context",
        "out: ",
        "outl: "
      ],
      "return": []
    },
    {
      "name": "EVP_DecryptInit_ex",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl解密初始化接口",
      "desc_en": "OpenSSL decryption initialization interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx,const EVP_CIPHER *cipher, ENGINE *impl, const unsigned char *key,const unsigned char *iv)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_CIPHER_CTX context",
        "cipher: EVP_CIPHER cipher",
        "impl: a ENGINE impl",
        "key: the symmetric key",
        "iv: the IV to use"
      ],
      "return": [
        "EVP_DecryptInit_ex() returns 1 for success and 0 for failure."
      ]
    },
    {
      "name": "EVP_DecryptUpdate",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl解密计算接口",
      "desc_en": "OpenSSL decryption calculation interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out,int *outl, const unsigned char *in, int inl)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_CIPHER_CTX context",
        "out: ",
        "outl: ",
        "in: ",
        "inl: "
      ],
      "return": [
        "EVP_DecryptUpdate() returns 1 for success and 0 for failure."
      ]
    },
    {
      "name": "EVP_DecryptFinal_ex",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl解密完成接口",
      "desc_en": "OpenSSL decryption completion interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *outm, int *outl)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_CIPHER_CTX context",
        "outm: ",
        "outl: "
      ],
      "return": [
        "EVP_DecryptFinal_ex() returns 0 if the decrypt failed or 1 for success."
      ]
    },
    {
      "name": "EVP_DigestInit_ex",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl摘要计算初始化接口",
      "desc_en": "OpenSSL digest calculation initialization interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type,ENGINE *impl)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_CIPHER_CTX context",
        "type: a digest type",
        "impl: a ENGINE impl"
      ],
      "return": [
        "EVP_DigestInit_ex() returns 1 for success and 0 for failure."
      ]
    },
    {
      "name": "EVP_DigestUpdate",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl摘要计算接口",
      "desc_en": "OpenSSL digest calculation interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_DigestUpdate(EVP_MD_CTX *ctx, const void *d,size_t cnt)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_MD_CTX context",
        "d: data",
        "cnt: cnt bytes of data"
      ],
      "return": [
        "EVP_DigestUpdate returns 1 for success and 0 for failure."
      ]
    },
    {
      "name": "EVP_DigestFinal_ex",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl摘要完成接口",
      "desc_en": "OpenSSL digest completion interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *md,unsigned int *s)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_MD_CTX context.",
        "md: places the digest value in md.",
        "s: If the s parameter is not NULL then the number of bytes of data written (i.e. the length of the digest) will be written to the integer at s, at most EVP_MAX_MD_SIZE bytes will be written."
      ],
      "return": [
        "EVP_DigestFinal_ex() returns 1 for success and 0 for failure."
      ]
    },
    {
      "name": "DH_generate_key",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl DH私钥生成接口",
      "desc_en": "OpenSSL DH private key generation interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int DH_generate_key(DH *dh)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "dh: contain the shared parameters dh->p and dh->g. It generates a random private DH value."
      ],
      "return": [
        "DH_generate_key() returns 1 on success, 0 otherwise."
      ]
    },
    {
      "name": "DH_compute_key",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl DH计算共享秘钥接口",
      "desc_en": "OpenSSL DH shared key calculation interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int DH_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "key: key must point to DH_size(dh) bytes of memory.",
        "pub_key: the other party's public value.",
        "dh: the shared secret from the private DH value."
      ],
      "return": [
        "DH_compute_key() returns the size of the shared secret on success, -1 on error."
      ]
    },
    {
      "name": "RSA_generate_key_ex",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl RSA秘钥对生成接口",
      "desc_en": "OpenSSL RSA key pairs generation interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int RSA_generate_key_ex(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "rsa: a RSA structure key pair provided in rsa.",
        "bits: the length of the modulus size.",
        "e_value: a public exponent will be e. Key sizes with num < 1024 should be considered insecure.",
        "cb: A callback function may be used to provide feedback about the progress of the key generation."
      ],
      "return": "If key generation fails, RSA_generate_key() returns NULL."
    },
    {
      "name": "EVP_PKEY_encrypt",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl 公钥加密接口",
      "desc_en": "OpenSSL public key encryption interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_PKEY_encrypt(EVP_PKEY_CTX *ctx,unsigned char *out, size_t *outlen,const unsigned char *in, size_t inlen)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_PKEY_CTX context",
        "out/outlen: If out is NULL then the maximum size of the output buffer is written to the outlen parameter. If out is not NULL then before the call the outlen parameter should contain the length of the out buffer, if the call is successful the encrypted data is written to out and the amount of data written to outlen.",
        "in/inlen: The data to be encrypted is specified using the in and inlen parameters. "
      ],
      "return": [
        "EVP_PKEY_encrypt() return 1 for success and 0 or a negative value for failure.",
        "In particular a return value of -2 indicates the operation is not supported by the public key algorithm."
      ]
    },
    {
      "name": "EVP_PKEY_decrypt",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl 私钥解密接口",
      "desc_en": "OpenSSL private key decryption interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_PKEY_decrypt(EVP_PKEY_CTX *ctx,unsigned char *out, size_t *outlen,const unsigned char *in, size_t inlen)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_PKEY_CTX context",
        "out/outlen:  If out is NULL then the maximum size of the output buffer is written to the outlen parameter. If out is not NULL then before the call the outlen parameter should contain the length of the out buffer, if the call is successful the decrypted data is written to out and the amount of data written to outlen.",
        "in/inlen: The data to be decrypted is specified using the in and inlen parameters."
      ],
      "return": [
        "EVP_PKEY_decrypt() return 1 for success and 0 or a negative value for failure.",
        "In particular a return value of -2 indicates the operation is not supported by the public key algorithm."
      ]
    },
    {
      "name": "EVP_PKEY_sign",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl 私钥签名接口",
      "desc_en": "OpenSSL private key signature interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_PKEY_sign(EVP_PKEY_CTX *ctx,unsigned char *sig, size_t *siglen,const unsigned char *tbs, size_t tbslen)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_PKEY_CTX context",
        "sig/siglen: If sig is NULL then the maximum size of the output buffer is written to the siglen parameter. If sig is not NULL then before the call the siglen parameter should contain the length of the sig buffer, if the call is successful the signature is written to sig and the amount of data written to siglen.",
        "tbs/tbslen: The data to be signed is specified using the tbs and tbslen parameters."
      ],
      "return": [
        "EVP_PKEY_sign() return 1 for success and 0 or a negative value for failure.",
        "In particular a return value of -2 indicates the operation is not supported by the public key algorithm."
      ]
    },
    {
      "name": "EVP_PKEY_verify",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl 公钥验签接口",
      "desc_en": "OpenSSL public key signature verification interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int EVP_PKEY_verify(EVP_PKEY_CTX *ctx,const unsigned char *sig, size_t siglen,const unsigned char *tbs, size_t tbslen)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "ctx: EVP_PKEY_CTX context",
        "sig/siglen: The signature is specified using the sig and siglen parameters",
        "tbs/tbslen: The verified data (i.e. the data believed originally signed) is specified using the tbs and tbslen parameters"
      ],
      "return": [
        "EVP_PKEY_verify() return 1 if the verification was successful and 0 if it failed.",
        "A negative value indicates an error other that signature verification failure.",
        "In particular a return value of -2 indicates the operation is not supported by the public key algorithm."
      ]
    },
    {
      "name": "RSA_private_decrypt",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl RSA私钥解密接口",
      "desc_en": "OpenSSL RSA private key decryption interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int RSA_private_decrypt(int flen, const unsigned char *from, unsigned char *to, RSA *rsa, int padding)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "flen: Length of the ciphertext to be decrypted",
        "from: the ciphertext to be decrypted",
        "to: a memory section large enough to hold the decrypted data (which is smaller than RSA_size(rsa))",
        "rsa: the private key rsa",
        "padding: the padding mode that was used to encrypt the data"
      ],
      "return": [
        "RSA_private_decrypt() returns the size of the recovered plaintext.",
        "On error, -1 is returned; the error codes can be obtained by ERR_get_error(3)."
      ]
    },
    {
      "name": "RSA_private_encrypt",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl RSA私钥加密接口",
      "desc_en": "OpenSSL RSA private key encryption interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int RSA_private_encrypt(int flen, const unsigned char *from, unsigned char *to, RSA *rsa, int padding)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "flen: the flen bytes at from",
        "from: usually a message digest with an algorithm identifier",
        "to: point to RSA_size(rsa) bytes of memory and stores the signature",
        "rsa: the private key rsa",
        "padding: padding modes, including RSA_PKCS1_PADDING, RSA_NO_PADDING"
      ],
      "return": [
        "RSA_private_encrypt() returns the size of the signature (i.e., RSA_size(rsa)).",
        "On error, -1 is returned; the error codes can be obtained by ERR_get_error(3)."
      ]
    },
    {
      "name": "RSA_public_decrypt",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl RSA公钥解密接口",
      "desc_en": "OpenSSL RSA public key decryption interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int RSA_public_decrypt(int flen, const unsigned char *from, unsigned char *to,RSA *rsa, int padding)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "flen: the flen bytes long signature",
        "from: the message digest from the flen bytes long signature",
        "to: point to a memory section large enough to hold the message digest (which is smaller than RSA_size(rsa) - 11)",
        "rsa: using the signer's public key rsa",
        "padding: the padding mode that was used to sign the data"
      ],
      "return": [
        "RSA_public_decrypt() returns the size of the recovered message digest.",
        "On error, -1 is returned; the error codes can be obtained by ERR_get_error(3)."
      ]
    },
    {
      "name": "RSA_public_encrypt",
      "library": "KAECrypto",
      "headerfile": "https://gitee.com/mirrors/openssl/raw/dda4e259e51aeaf05a2417ef577accf778c9f6f6/include/openssl/evp.h",
      "desc_cn": "openssl RSA公钥加密接口",
      "desc_en": "OpenSSL RSA public key encryption interface",
      "benefit_cn": "基于鲲鹏加速器实现RSA/AES/SM3/SM4/MD5/DH性能加速",
      "benefit_en": "RSA/AES/SM3/SM4/MD5/DH performance accelerated based on the Kunpeng accelerator",
      "func_name": "int RSA_public_encrypt(int flen, const unsigned char *from, unsigned char *to,RSA *rsa, int padding)",
      "headerfile_desc": "evp.h",
      "parameters": [
        "flen: Length of the plaintext to be encrypted",
        "from: the plaintext to be encrypted",
        "to: point to RSA_size(rsa) bytes of memory and stores the ciphertext",
        "rsa: the public key rsa",
        "padding: padding modes, including RSA_PKCS1_PADDING, RSA_PKCS1_OAEP_PADDING, RSA_SSLV23_PADDING, RSA_NO_PADDING"
      ],
      "return": [
        "RSA_public_encrypt() returns the size of the encrypted data (i.e., RSA_size(rsa)).",
        "On error, -1 is returned; the error codes can be obtained by ERR_get_error(3)."
      ]
    },
    {
      "name": "vsadd",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型加法函数",
      "desc_en": "Single-precision floating-point real type adds vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsadd(const int len, const float* src1, const float* src2, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The sum of two numbers is returned for each operation value. The value range is [-INF, +INF].",
        "If the values of src1 and src2 are –0, the value of dst is –0. If the values of src1 and src2 are ±0, the value of dst is +0.",
        "If the values of src1 and src2 are -∞, the value of dst is -∞. If the values of src1 and src2 are +∞, the value of dst is +∞.",
        "If the value of src1 is +∞ and the value of src2 is -∞, then the value of dst is Not a Number (NaN). If the value of src1 is -∞ and the value of src2 is +∞, then the value of dst is NaN.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vdadd",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型加法函数",
      "desc_en": "Double-precision floating-point real type adds vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdadd(const int len, const double* src1, const double* src2, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The sum of two numbers is returned for each operation value. The value range is [-INF, +INF].",
        "If the values of src1 and src2 are –0, the value of dst is –0. If the values of src1 and src2 are ±0, the value of dst is +0.",
        "If the values of src1 and src2 are -∞, the value of dst is -∞. If the values of src1 and src2 are +∞, the value of dst is +∞.",
        "If the value of src1 is +∞ and the value of src2 is -∞, then the value of dst is Not a Number (NaN). If the value of src1 is -∞ and the value of src2 is +∞, then the value of dst is NaN.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vssub",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型减法函数",
      "desc_en": "Single-precision floating-point real type subtracts vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vssub(const int len, const float* src1, const float* src2, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 (minuend) with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 (subtrahend) with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The difference of two numbers is returned for each operation value. The value range is (-INF, +INF).",
        "If the value of src1 is –0 and the value of src2 is +0, then the value of dst is –0. If the value of src1 is ±0 and the value of src2 is ±0, then the value of dst is +0.",
        "If the value of src1 is -∞ and the value of src2 is -∞, then the value of dst is NaN. If the value of src1 is +∞ and the value of src2 is +∞, then the value of dst is NaN.",
        "If the value of src1 is +∞ and the value of src2 is -∞, then the value of dst is +∞. If the value of src1 is -∞ and the value of src2 is +∞, then the value of dst is -∞.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vdsub",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型减法函数",
      "desc_en": "Double-precision floating-point real type subtracts vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdsub(const int len, const double* src1, const double* src2, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 (minuend) with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 (subtrahend) with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The difference of two numbers is returned for each operation value. The value range is (-INF, +INF).",
        "If the value of src1 is –0 and the value of src2 is +0, then the value of dst is –0. If the value of src1 is ±0 and the value of src2 is ±0, then the value of dst is +0.",
        "If the value of src1 is -∞ and the value of src2 is -∞, then the value of dst is NaN. If the value of src1 is +∞ and the value of src2 is +∞, then the value of dst is NaN.",
        "If the value of src1 is +∞ and the value of src2 is -∞, then the value of dst is +∞. If the value of src1 is -∞ and the value of src2 is +∞, then the value of dst is -∞.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vssqr",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型平方函数",
      "desc_en": "Single-precision floating-point real type squares a vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vssqr(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The square of a number is returned for each operation value. The value range is [0, +INF).",
        "If the value of src is ±0, the value of dst is +0.",
        "If the value of src is ±∞, the value of dst is +∞.",
        "If the value of src is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vdsqr",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型平方函数",
      "desc_en": "Double-precision floating-point real type squares a vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdsqr(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The square of a number is returned for each operation value. The value range is [0, +INF).",
        "If the value of src is ±0, the value of dst is +0.",
        "If the value of src is ±∞, the value of dst is +∞.",
        "If the value of src is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vsmul",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型乘法函数",
      "desc_en": "Single-precision floating-point real type multiplies vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsmul(const int len, const float* src1, const float* src2, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The product of two numbers is returned for each operation value. The value range is (-INF, +INF).",
        "If the value of src1 is ±0 and the value of src2 is ±0, then the value of dst is ±0.",
        "If the value of src1 is ±∞ and the value of src2 is ±∞, then the value of dst is ±∞.",
        "If the value of src1 is ±∞ and the value of src2 is ±0, then the value of dst is NaN. If the value of src1 is ±0 and the value of src2 is +∞, then the value of dst is NaN.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vdmul",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型乘法函数",
      "desc_en": "Double-precision floating-point real type multiplies vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdmul(const int len, const double* src1, const double* src2, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The product of two numbers is returned for each operation value. The value range is (-INF, +INF).",
        "If the value of src1 is ±0 and the value of src2 is ±0, then the value of dst is ±0.",
        "If the value of src1 is ±∞ and the value of src2 is ±∞, then the value of dst is ±∞.",
        "If the value of src1 is ±∞ and the value of src2 is ±0, then the value of dst is NaN. If the value of src1 is ±0 and the value of src2 is +∞, then the value of dst is NaN.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vsdiv",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型除法函数",
      "desc_en": "Computes the quotient of single-precision floating-point real type vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsdiv(const int len, const float* src1, const float* src2, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 (dividend) with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 (divisor) with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The quotient of two numbers is returned for each operation value. The value range is (-INF, +INF).",
        "If the value of src1 is ±0 and the value of src2 is ±0, then the value of dst is NaN.",
        "If the value of src1 is greater than +0 and the value of src2 is ±∞, then the value of dst is ±0. If the value of src1 is greater than +0 and the value of src2 is ±0, then the value of dst is ±∞.",
        "If the value of src1 is less than +0 and the value of src2 is +0, then the value of dst is -∞. If the value of src1 is less than +0 and the value of src2 is -0, then the value of dst is +∞.",
        "If the value of src1 is +∞ and the value of src2 is +∞, then the value of dst is NaN. If the value of src1 is less than -∞ and the value of src2 is -∞, then the value of dst is NaN.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vddiv",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型除法函数",
      "desc_en": "Computes the quotient of double-precision floating-point real type vectors",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vddiv(const int len, const double* src1, const double* src2, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 (dividend) with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 (divisor) with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The quotient of two numbers is returned for each operation value. The value range is (-INF, +INF).",
        "If the value of src1 is ±0 and the value of src2 is ±0, then the value of dst is NaN.",
        "If the value of src1 is greater than +0 and the value of src2 is ±∞, then the value of dst is ±0. If the value of src1 is greater than +0 and the value of src2 is ±0, then the value of dst is ±∞.",
        "If the value of src1 is less than +0 and the value of src2 is +0, then the value of dst is -∞. If the value of src1 is less than +0 and the value of src2 is -0, then the value of dst is +∞.",
        "If the value of src1 is +∞ and the value of src2 is +∞, then the value of dst is NaN. If the value of src1 is less than -∞ and the value of src2 is -∞, then the value of dst is NaN.",
        "If the value of src1 or src2 is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vssqrt",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型开方函数",
      "desc_en": "Computes the square root of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vssqrt(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The square root of a number is returned for each operation value. The value range is [0, +INF).",
        "If the value of src is ±0, the value of dst is ±0.",
        "If the value of src is less than +0, the value of dst is NaN.",
        "If the value of src is +∞, the value of dst is +∞.",
        "If the value of src is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vdsqrt",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型开方函数",
      "desc_en": "Computes the square root of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdsqrt(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The square root of a number is returned for each operation value. The value range is [0, +INF).",
        "If the value of src is ±0, the value of dst is ±0.",
        "If the value of src is less than +0, the value of dst is NaN.",
        "If the value of src is +∞, the value of dst is +∞.",
        "If the value of src is NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vspow",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型幂函数",
      "desc_en": "Raises a single-precision floating-point real type vector to the specified power",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vspow(const int len, const float* src1, const float* src2, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "If the value of src1 is greater than 0 and the value of src2 is a decimal, the value range of dst is ∈ (-INF, +INF).",
        "If the value of src1 is -0 and the value of src2 is a negative odd number, then the value of dst is -∞. If the value of src1 is ±0 and the value of src2 is a negative number, then the value of dst is +∞.",
        "If the value of src1 is ±0 and the value of src2 is a positive odd number, then the value of dst is ±0. If the value of src1 is ±0 and the value of src2 is a positive number, then the value of dst is +0.",
        "If the value of src1 is ±1 and the value of src2 is any value, the value of dst is +1.",
        "If the value of src1 is any value and the value of src2 is ±0, the value of dst is +1.",
        "If the value of src1 is less than +0 and the value of src2 is a decimal, the value of dst is NaN.",
        "If the value of |src1| is less than 1 and the value of src2 is -∞, then the value of dst is +∞. If the value of |src1| is greater than 1 and the value of src2 is -∞, then the value of dst is +0.",
        "If the value of |src1| is less than 1 and the value of src2 is +∞, then the value of dst is +0. If the value of |src1| is greater than 1 and the value of src2 is +∞, then the value of dst is +∞.",
        "If the value of src1 is -∞ and the value of src2 is a negative odd number, then the value of dst is -0. If the value of src1 is -∞ and the value of src2 is a negative number, then the value of dst is +0.",
        "If the value of src1 is -∞ and the value of src2 is a positive odd number, then the value of dst is -∞. If the value of src1 is -∞ and the value of src2 is a positive number, then the value of dst is +∞.",
        "If the value of src1 is +∞ and the value of src2 is less than +0, the value of dst is +0. If the value of src1 is +∞, the value of dst is +∞.",
        "If the values of src1 and src2 are both large finite numbers, the value of dst is ±∞.",
        "If the values of src1 and src2 are both NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vdpow",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型幂函数",
      "desc_en": "Raises a double-precision floating-point real type vector to the specified power",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdpow(const int len, const double* src1, const double* src2, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "If the value of src1 is greater than 0 and the value of src2 is a decimal, the value range of dst is ∈ (-INF, +INF).",
        "If the value of src1 is -0 and the value of src2 is a negative odd number, then the value of dst is -∞. If the value of src1 is ±0 and the value of src2 is a negative number, then the value of dst is +∞.",
        "If the value of src1 is ±0 and the value of src2 is a positive odd number, then the value of dst is ±0. If the value of src1 is ±0 and the value of src2 is a positive number, then the value of dst is +0.",
        "If the value of src1 is ±1 and the value of src2 is any value, the value of dst is +1.",
        "If the value of src1 is any value and the value of src2 is ±0, the value of dst is +1.",
        "If the value of src1 is less than +0 and the value of src2 is a decimal, the value of dst is NaN.",
        "If the value of |src1| is less than 1 and the value of src2 is -∞, then the value of dst is +∞. If the value of |src1| is greater than 1 and the value of src2 is -∞, then the value of dst is +0.",
        "If the value of |src1| is less than 1 and the value of src2 is +∞, then the value of dst is +0. If the value of |src1| is greater than 1 and the value of src2 is +∞, then the value of dst is +∞.",
        "If the value of src1 is -∞ and the value of src2 is a negative odd number, then the value of dst is -0. If the value of src1 is -∞ and the value of src2 is a negative number, then the value of dst is +0.",
        "If the value of src1 is -∞ and the value of src2 is a positive odd number, then the value of dst is -∞. If the value of src1 is -∞ and the value of src2 is a positive number, then the value of dst is +∞.",
        "If the value of src1 is +∞ and the value of src2 is less than +0, the value of dst is +0. If the value of src1 is +∞, the value of dst is +∞.",
        "If the values of src1 and src2 are both large finite numbers, the value of dst is ±∞.",
        "If the values of src1 and src2 are both NaN, the value of dst is NaN."
      ]
    },
    {
      "name": "vsexp",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型以自然对数为底的指数函数",
      "desc_en": "Computes the base-e exponential of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsexp(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The exponent y of x with e as the base is returned for each operation value. y ∈ (0, +INF)",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input of expf is greater than 0x1.62e42ep6f (about 88), the return value is +∞. If the input of exp is greater than 0x1.62p9 (about 708), the return value is +∞.",
        "If the input of expf is less than -0x1.9fe368p6f (about -104), the return value is +0. If the input of exp is less than -0x1.62p9 (about -708), the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdexp",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型以自然对数为底的指数函数",
      "desc_en": "Computes the base-e exponential of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdexp(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The exponent y of x with e as the base is returned for each operation value. y ∈ (0, +INF)",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input of expf is greater than 0x1.62e42ep6f (about 88), the return value is +∞. If the input of exp is greater than 0x1.62p9 (about 708), the return value is +∞.",
        "If the input of expf is less than -0x1.9fe368p6f (about -104), the return value is +0. If the input of exp is less than -0x1.62p9 (about -708), the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vsln",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型以自然对数为底的对数函数",
      "desc_en": "Computes the base-e logarithm of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsln(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The logarithm y of x with e as the base is returned for each operation value. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdln",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型以自然对数为底的对数函数",
      "desc_en": "Computes the base-e logarithm of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdln(const int len, const double* src, double* dst);",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The logarithm y of x with e as the base is returned for each operation value. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vslog10",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型以10为底的对数函数",
      "desc_en": "Computes the base-10 logarithm of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vslog10(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The logarithm y of x with 10 as the base is returned for each operation value. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdlog10",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型以10为底的对数函数",
      "desc_en": "Computes the base-10 logarithm of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdlog10(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The logarithm y of x with 10 as the base is returned for each operation value. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vscos",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型余弦函数",
      "desc_en": "Computes the cosine of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vscos(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The cosine y of radian angle x is returned for each operation value. y ∈ [-1, +1]",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdcos",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型余弦函数",
      "desc_en": "Computes the cosine of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdcos(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The cosine y of radian angle x is returned for each operation value. y ∈ [-1, +1]",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vssin",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正弦函数",
      "desc_en": "Computes the sine of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vssin(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The sine y of radian angle x is returned for each operation value. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdsin",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正弦函数",
      "desc_en": "Computes the sine of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdsin(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The sine y of radian angle x is returned for each operation value. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vstan",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正切函数",
      "desc_en": "Computes the tangent of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vstan(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the tangent function value y of the radian angle x. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN.",
        "This function has a mathematical pole at π(1/2 + n). Because no general floating-point numbers can accurately represent the value at the pole, the return value for the pole cannot be considered reliable."
      ]
    },
    {
      "name": "vdtan",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正切函数",
      "desc_en": "Computes the tangent of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdtan(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the tangent function value y of the radian angle x. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN.",
        "This function has a mathematical pole at π(1/2 + n). Because no general floating-point numbers can accurately represent the value at the pole, the return value for the pole cannot be considered reliable."
      ]
    },
    {
      "name": "vsatan",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsatan(const int len, const float* src, float* dst);",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the unique angle whose tangent value is x on (–π/2, π/2). The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdatan",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdatan(const int len, const double* src, double* dst);",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the unique angle whose tangent value is x on (–π/2, π/2). The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vsatan2",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型两个变量四个象限下的反正切函数",
      "desc_en": "Computes the four-quadrant arc tangent of two variables",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsatan2(const int len, const float* src1, const float* src2, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "An azimuth from the origin to the point (x, y) is returned for each operation value atan2(y, x), that is, the included angle with reference to the x-axis. The unit of the return value is radian, and the value range is (-π, +π].",
        "If y is ±0 and x is negative or -0, the return value is ±π.",
        "If y is ±0 and x is positive or +0, the return value is ±0.",
        "If y is ±∞ and x is a finite number, the return value is ±π/2.",
        "If y is ±∞ and x is –∞, the return value is ±3π/4.",
        "If y is ±∞ and x is +∞, the return value is ±π/4.",
        "If x is ±0 and y is negative, the return value is -π/2.",
        "If x is ±0 and y is positive, the return value is +π/2.",
        "If x is -∞ and y is a finite positive number, the return value is +π.",
        "If x is -∞ and y is a finite negative number, the return value is -π.",
        "If x is +∞ and y is a finite positive number, the return value is +0.",
        "If x is +∞ and y is a finite negative number, the return value is -0.",
        "If x or y is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdatan2",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型两个变量四个象限下的反正切函数",
      "desc_en": "Computes the four-quadrant arc tangent of two variables",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdatan2(const int len, const double* src1, const double* src2, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src1: Input vector src1 with length len. If the pointer is null, the system prompts a null pointer error.",
        "src2: Input vector src2 with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "An azimuth from the origin to the point (x, y) is returned for each operation value atan2(y, x), that is, the included angle with reference to the x-axis. The unit of the return value is radian, and the value range is (-π, +π].",
        "If y is ±0 and x is negative or -0, the return value is ±π.",
        "If y is ±0 and x is positive or +0, the return value is ±0.",
        "If y is ±∞ and x is a finite number, the return value is ±π/2.",
        "If y is ±∞ and x is –∞, the return value is ±3π/4.",
        "If y is ±∞ and x is +∞, the return value is ±π/4.",
        "If x is ±0 and y is negative, the return value is -π/2.",
        "If x is ±0 and y is positive, the return value is +π/2.",
        "If x is -∞ and y is a finite positive number, the return value is +π.",
        "If x is -∞ and y is a finite negative number, the return value is -π.",
        "If x is +∞ and y is a finite positive number, the return value is +0.",
        "If x is +∞ and y is a finite negative number, the return value is -0.",
        "If x or y is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vssincos",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正弦函数与余弦函数",
      "desc_en": "Computes the sine and cosine of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vssincos(const int len, const float* src, float* sindst, float* cosdst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "sindst: Output vector sindst with length len. If the pointer is null, the system prompts a null pointer error.",
        "cosdst: Output vector cosdst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The sine sinx and cosine cosx of the radian angle x is returned for each operation value. sinx ∈ [-1, +1]; cosx ∈ [-1, +1].",
        "If the input is +0, the sine is +0 and the cosine is +1.",
        "If the input is -0, the sine is -0 and the cosine is +1.",
        "If the input is ±∞, the sine and cosine are both NaN.",
        "If the input is NaN, the sine and cosine are both NaN."
      ]
    },
    {
      "name": "vdsincos",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正弦函数与余弦函数",
      "desc_en": "Computes the sine and cosine of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdsincos(const int len, const double* src, double* sindst, double* cosdst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "sindst: Output vector sindst with length len. If the pointer is null, the system prompts a null pointer error.",
        "cosdst: Output vector cosdst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The sine sinx and cosine cosx of the radian angle x is returned for each operation value. sinx ∈ [-1, +1]; cosx ∈ [-1, +1].",
        "If the input is +0, the sine is +0 and the cosine is +1.",
        "If the input is -0, the sine is -0 and the cosine is +1.",
        "If the input is ±∞, the sine and cosine are both NaN.",
        "If the input is NaN, the sine and cosine are both NaN."
      ]
    },
    {
      "name": "vssinh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vssinh(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The hyperbolic sine y of the radian angle x is returned for each operation value.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input value of sinhf is greater than 0x1.8p6f, +∞ is returned. If the input value of sinh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of sinhf is less than -0x1.8p6f, –∞ is returned. If the input value of sinh is less than -0x1.8p9, –∞ is returned.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdsinh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdsinh(const int len, const double* src, double* dst);",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The hyperbolic sine y of the radian angle x is returned for each operation value.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input value of sinhf is greater than 0x1.8p6f, +∞ is returned. If the input value of sinh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of sinhf is less than -0x1.8p6f, –∞ is returned. If the input value of sinh is less than -0x1.8p9, –∞ is returned.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vscosh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vscosh(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The hyperbolic cosine y of radian angle x is returned for each operation value. y ∈ [1, +∞].",
        "If the input is ±0, the return value is +1.",
        "If the input value of coshf is greater than 0x1.8p6f, +∞ is returned. If the input value of cosh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of coshf is less than -0x1.8p6f, +∞ is returned. If the input value of cosh is less than -0x1.8p9, +∞ is returned.",
        "If the input is ±∞, the return value is +∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdcosh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdcosh(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The hyperbolic cosine y of radian angle x is returned for each operation value. y ∈ [1, +∞].",
        "If the input is ±0, the return value is +1.",
        "If the input value of coshf is greater than 0x1.8p6f, +∞ is returned. If the input value of cosh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of coshf is less than -0x1.8p6f, +∞ is returned. If the input value of cosh is less than -0x1.8p9, +∞ is returned.",
        "If the input is ±∞, the return value is +∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vstanh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vstanh(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the hyperbolic tangent function value y of the radian angle x. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +1.",
        "If the input is -∞, the return value is -1.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdtanh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdtanh(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the hyperbolic tangent function value y of the radian angle x. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +1.",
        "If the input is -∞, the return value is -1.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vsasinh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲反正弦函数",
      "desc_en": "Computes the hyperbolic arc sine of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsasinh(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The inverse hyperbolic sine y of the radian angle x is returned for each operation value.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdasinh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲反正弦函数",
      "desc_en": "Computes the hyperbolic arc sine of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdasinh(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The inverse hyperbolic sine y of the radian angle x is returned for each operation value.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vsacosh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲反余弦函数",
      "desc_en": "Computes hyperbolic arc cosine of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsacosh(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the hyperbolic arc cosine function value y of the radian angle x. x ∈ [1, +∞).",
        "If the input is +1, the return value is +0.",
        "If the input x is less than +1, NaN is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdacosh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲反余弦函数",
      "desc_en": "Computes the hyperbolic arc cosine of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdacosh(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "Each operation value returns the hyperbolic arc cosine function value y of the radian angle x. x ∈ [1, +∞).",
        "If the input is +1, the return value is +0.",
        "If the input x is less than +1, NaN is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vsatanh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲反正切函数",
      "desc_en": "Computes the hyperbolic arc tangent of a single-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vsatanh(const int len, const float* src, float* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The inverse hyperbolic tangent y of the radiant angle x is returned for each operation value. x ∈ (-1, +1).",
        "If the input is +1, the return value is +∞.",
        "If you enter -1, -∞ is returned.",
        "If the input |x| is greater than 1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "vdatanh",
      "library": "KML_VML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲反正切函数",
      "desc_en": "Computes the hyperbolic arc tangent of a double-precision floating-point real type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void vdatanh(const int len, const double* src, double* dst)",
      "headerfile_desc": "kvml.h",
      "parameters": [
        "len: Number of elements in the input vector. If len ≤ 0, the system displays a message indicating that the value of len is invalid.",
        "src: Input vector src with length len. If the pointer is null, the system prompts a null pointer error.",
        "dst: Output vector dst with length len. If the pointer is null, the system prompts a null pointer error."
      ],
      "return": [
        "The inverse hyperbolic tangent y of the radiant angle x is returned for each operation value. x ∈ (-1, +1).",
        "If the input is +1, the return value is +∞.",
        "If you enter -1, -∞ is returned.",
        "If the input |x| is greater than 1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "sin",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正弦函数",
      "desc_en": "Computes the sine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double sin(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "Floating-point value of a radian angle"
      ],
      "return": [
        "The sine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "sinf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正弦函数",
      "desc_en": "Computes the sine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float sinf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "Floating-point value of a radian angle"
      ],
      "return": [
        "The sine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "csin",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型正弦函数",
      "desc_en": "Computes the sine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double complex csin(double complex x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "Floating-point value of a radian angle"
      ],
      "return": [
        "The sine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "csinf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型正弦函数",
      "desc_en": "Computes the sine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float complex csinf(float complex x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "Floating-point value of a radian angle"
      ],
      "return": [
        "The sine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "cos",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型余弦函数",
      "desc_en": "Computes the cosine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double cos(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The cosine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is -1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "cosf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型余弦函数",
      "desc_en": "Computes the cosine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float cosf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The cosine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is -1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "ccos",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型余弦函数",
      "desc_en": "Computes the cosine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double complex ccos(double complex x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "Floating-point value of a radian angle"
      ],
      "return": [
        "The cosine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is -1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "ccosf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型余弦函数",
      "desc_en": "Computes the cosine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float complex ccosf(float complex x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "Floating-point value of a radian angle"
      ],
      "return": [
        "The cosine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is -1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "sincos",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正弦、余弦函数",
      "desc_en": "Computes the sine and cosine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "void sincos(double x, double* sinx, double* cosx)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle",
        "sinx: Output pointer sinx. If the pointer is null, an undefined behavior occurs, and the interface does not verify the output pointer.",
        "cosx: Output pointer cosx. If the pointer is null, an undefined behavior occurs, and the interface does not verify the output pointer."
      ],
      "return": [
        "The sine sinx and cosine cosx of the radian angle x are returned. sinx ∈ [-1, +1]; cosx ∈ [-1, +1].",
        "If the input is +0, the sine is +0 and the cosine is +1.",
        "If the input is -0, the sine is -0 and the cosine is +1.",
        "If the input is ±∞, the sine and cosine are both NaN.",
        "If the input is NaN, the sine and cosine are both NaN."
      ]
    },
    {
      "name": "sincosf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正弦、余弦函数",
      "desc_en": "Computes the sine and cosine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "void sincosf(float x, float* sinx, float* cosx)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle",
        "sinx: Output pointer sinx. If the pointer is null, an undefined behavior occurs, and the interface does not verify the output pointer.",
        "cosx: Output pointer cosx. If the pointer is null, an undefined behavior occurs, and the interface does not verify the output pointer."
      ],
      "return": [
        "The sine sinx and cosine cosx of the radian angle x are returned. sinx ∈ [-1, +1]; cosx ∈ [-1, +1].",
        "If the input is +0, the sine is +0 and the cosine is +1.",
        "If the input is -0, the sine is -0 and the cosine is +1.",
        "If the input is ±∞, the sine and cosine are both NaN.",
        "If the input is NaN, the sine and cosine are both NaN."
      ]
    },
    {
      "name": "csincos",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型正弦、余弦函数",
      "desc_en": "Computes the sine and cosine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "csincosf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型正弦、余弦函数",
      "desc_en": "Computes the sine and cosine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "tan",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正切函数",
      "desc_en": "Computes the tangent of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double tan(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The tangent y of the radiant angle x is returned. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "tanf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正切函数",
      "desc_en": "Computes the tangent of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float tanf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The tangent y of the radiant angle x is returned. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "ctan",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型正切函数",
      "desc_en": "Computes the tangent of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double complex ctan(double complex x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The tangent y of the radiant angle x is returned. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "ctanf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型正切函数",
      "desc_en": "Computes the tangent of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float complex ctanf(float complex x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The tangent y of the radiant angle x is returned. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "asin",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型反正弦函数",
      "desc_en": "Computes the arc sine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double asin(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the arc sine function value y of the radian angle x. x ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input x is ±1, the return value is ±π/2.",
        "If the input |x| is greater than +1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "asinf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型反正弦函数",
      "desc_en": "Computes the arc sine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float asinf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the arc sine function value y of the radian angle x. x ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input x is ±1, the return value is ±π/2.",
        "If the input |x| is greater than +1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "casin",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型反正弦函数",
      "desc_en": "Computes the arc sine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "casinf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型反正弦函数",
      "desc_en": "Computes the arc sine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "acos",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型反余弦函数",
      "desc_en": "Computes the arc cosine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double acos(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the arc cosine function value y of the radian angle x. x ∈ [-1, +1].",
        "If the input is ±0, the return value is +π/2.",
        "If the input is +1, the return value is +0.",
        "If the input is -1, the return value is +π.",
        "If the input |x| is greater than +1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "acosf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型反余弦函数",
      "desc_en": "Computes the arc cosine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float acosf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the arc cosine function value y of the radian angle x. x ∈ [-1, +1].",
        "If the input is ±0, the return value is +π/2.",
        "If the input is +1, the return value is +0.",
        "If the input is -1, the return value is +π.",
        "If the input |x| is greater than +1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "cacos",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型反余弦函数",
      "desc_en": "Computes the arc cosine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "cacosf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型反余弦函数",
      "desc_en": "Computes the arc cosine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "atan",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double atan(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The unique angle whose tangent value is x at (–π/2, π/2) is returned. The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "atanf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float atanf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The unique angle whose tangent value is x at (–π/2, π/2) is returned. The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "catan",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型反正切函数",
      "desc_en": "Computes the arc tangent of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double complex catan(double complex x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The unique angle whose tangent value is x at (–π/2, π/2) is returned. The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "catanf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型反正切函数",
      "desc_en": "Computes the arc tangent of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float complex catanf(float complex x);",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The unique angle whose tangent value is x at (–π/2, π/2) is returned. The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "atan2",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double atan2(double y, double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "An azimuth from the origin to the point (x, y) is returned for atan2(y, x), that is, the included angle with reference to the x-axis. The unit of the return value is radian, and the value range is (-π, +π].",
        "If y is ±0 and x is negative or -0, the return value is ±π.",
        "If y is ±0 and x is positive or +0, the return value is ±0.",
        "If y is ±∞ and x is a finite number, the return value is ±π/2.",
        "If y is ±∞ and x is –∞, the return value is ±3π/4.",
        "If y is ±∞ and x is +∞, the return value is ±π/4.",
        "If x is ±0 and y is negative, the return value is -π/2.",
        "If x is ±0 and y is positive, the return value is +π/2.",
        "If x is -∞ and y is a finite positive number, the return value is +π.",
        "If x is -∞ and y is a finite negative number, the return value is -π.",
        "If x is +∞ and y is a finite positive number, the return value is +0.",
        "If x is +∞ and y is a finite negative number, the return value is -0.",
        "If x or y is NaN, the return value is NaN."
      ]
    },
    {
      "name": "atan2f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float atan2f(float y, float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "An azimuth from the origin to the point (x, y) is returned for atan2(y, x), that is, the included angle with reference to the x-axis. The unit of the return value is radian, and the value range is (-π, +π].",
        "If y is ±0 and x is negative or -0, the return value is ±π.",
        "If y is ±0 and x is positive or +0, the return value is ±0.",
        "If y is ±∞ and x is a finite number, the return value is ±π/2.",
        "If y is ±∞ and x is –∞, the return value is ±3π/4.",
        "If y is ±∞ and x is +∞, the return value is ±π/4.",
        "If x is ±0 and y is negative, the return value is -π/2.",
        "If x is ±0 and y is positive, the return value is +π/2.",
        "If x is -∞ and y is a finite positive number, the return value is +π.",
        "If x is -∞ and y is a finite negative number, the return value is -π.",
        "If x is +∞ and y is a finite positive number, the return value is +0.",
        "If x is +∞ and y is a finite negative number, the return value is -0.",
        "If x or y is NaN, the return value is NaN."
      ]
    },
    {
      "name": "catan2",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型反正切函数",
      "desc_en": "Computes the arc tangent of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "catan2f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型反正切函数",
      "desc_en": "Computes the arc tangent of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "sinh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double sinh(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the hyperbolic sine function value y of the radian angle x.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input value of sinhf is greater than 0x1.8p6f, +∞ is returned. If the input value of sinh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of sinhf is less than -0x1.8p6f, –∞ is returned. If the input value of sinh is less than -0x1.8p9, –∞ is returned.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "sinhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float sinhf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the hyperbolic sine function value y of the radian angle x.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input value of sinhf is greater than 0x1.8p6f, +∞ is returned. If the input value of sinh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of sinhf is less than -0x1.8p6f, –∞ is returned. If the input value of sinh is less than -0x1.8p9, –∞ is returned.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "csinh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "csinhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "cosh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double cosh(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the hyperbolic cosine function value y of the radian angle x. y ∈ [1, +∞].",
        "If the input is ±0, the return value is +1.",
        "If the input value of coshf is greater than 0x1.8p6f, +∞ is returned. If the input value of cosh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of coshf is less than -0x1.8p6f, +∞ is returned. If the input value of cosh is less than -0x1.8p9, +∞ is returned.",
        "If the input is ±∞, the return value is +∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "coshf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float coshf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the hyperbolic cosine function value y of the radian angle x. y ∈ [1, +∞].",
        "If the input is ±0, the return value is +1.",
        "If the input value of coshf is greater than 0x1.8p6f, +∞ is returned. If the input value of cosh is greater than 0x1.8p9, +∞ is returned.",
        "If the input value of coshf is less than -0x1.8p6f, +∞ is returned. If the input value of cosh is less than -0x1.8p9, +∞ is returned.",
        "If the input is ±∞, the return value is +∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "ccosh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "ccoshf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "tanh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double tanh(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "Return the hyperbolic tangent function value y of the radian angle x. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +1.",
        "If the input is -∞, the return value is -1.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "tanhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float tanhf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "Return the hyperbolic tangent function value y of the radian angle x. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +1.",
        "If the input is -∞, the return value is -1.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "ctanh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "ctanhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "asinh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲反正弦函数",
      "desc_en": "Computes the hyperbolic arc sine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double asinh(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The inverse hyperbolic sine y of the radian angle x is returned.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "asinhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲反正弦函数",
      "desc_en": "Computes the hyperbolic arc sine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float asinhf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "The inverse hyperbolic sine y of the radian angle x is returned.",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±∞.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "casinh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型双曲反正弦函数",
      "desc_en": "Computes the hyperbolic arc sine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "casinhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型双曲反正弦函数",
      "desc_en": "Computes the hyperbolic arc sine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "acosh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲反余弦函数",
      "desc_en": "Computes the hyperbolic arc cosine of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double acosh(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the hyperbolic arc cosine function value y of the radian angle x. x ∈ [1, +∞).",
        "If the input is +1, the return value is +0.",
        "If the input x is less than +1, NaN is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "acoshf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲反余弦函数",
      "desc_en": "Computes the hyperbolic arc cosine of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float acoshf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of a radian angle"
      ],
      "return": [
        "Return the hyperbolic arc cosine function value y of the radian angle x. x ∈ [1, +∞).",
        "If the input is +1, the return value is +0.",
        "If the input x is less than +1, NaN is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "cacosh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型双曲反余弦函数",
      "desc_en": "Computes the hyperbolic arc cosine of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "cacoshf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型双曲反余弦函数",
      "desc_en": "Computes the hyperbolic arc cosine of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "atanh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲反正切函数",
      "desc_en": "Computes the hyperbolic arc tangent of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double atanh(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "Return the hyperbolic arc tangent function value y of the radian angle x. x ∈ (-1, +1).",
        "If the input is +1, the return value is +∞.",
        "If you enter -1, -∞ is returned.",
        "If the input |x| is greater than 1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "atanhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲反正切函数",
      "desc_en": "Computes the hyperbolic arc tangent of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float atanhf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "Return the hyperbolic arc tangent function value y of the radian angle x. x ∈ (-1, +1).",
        "If the input is +1, the return value is +∞.",
        "If you enter -1, -∞ is returned.",
        "If the input |x| is greater than 1, the return value is NaN.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "catanh",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型双曲反正切函数",
      "desc_en": "Computes the hyperbolic arc tangent of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "catanhf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型双曲反正切函数",
      "desc_en": "Computes the hyperbolic arc tangent of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "exp",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double exp(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-e exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input of expf is greater than 0x1.62e42ep6f (about 88), the return value is +∞. If the input of exp is greater than 0x1.62p9 (about 708), the return value is +∞.",
        "If the input of expf is less than -0x1.9fe368p6f (about -104), the return value is +0. If the input value of exp is less than -0x1.62p9 (about -708), the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "expf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float expf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-e exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input of expf is greater than 0x1.62e42ep6f (about 88), the return value is +∞. If the input of exp is greater than 0x1.62p9 (about 708), the return value is +∞.",
        "If the input of expf is less than -0x1.9fe368p6f (about -104), the return value is +0. If the input value of exp is less than -0x1.62p9 (about -708), the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "cexp",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "cexpf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "exp2",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型指数函数(base 2)",
      "desc_en": "Computes the base-2 exponential of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double exp2(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-2 exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1",
        "If input of expf is greater than 128, the return value is +∞. If the input of exp is greater than 1024, the return value is +∞.",
        "If the input of expf is less than -150, the return value is +0. If the input of exp is less than -1075, the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "exp2f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型指数函数(base 2)",
      "desc_en": "Computes the base-2 exponential of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float exp2f(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-2 exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1",
        "If input of expf is greater than 128, the return value is +∞. If the input of exp is greater than 1024, the return value is +∞.",
        "If the input of expf is less than -150, the return value is +0. If the input of exp is less than -1075, the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "cexp2",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型指数函数(base 2)",
      "desc_en": "Computes the base-2 exponential of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "cexp2f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型指数函数(base 2)",
      "desc_en": "Computes the base-2 exponential of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "log",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double log(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-e logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "logf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float logf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-e logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "clog",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "clogf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "log2",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型对数函数(base 2)",
      "desc_en": "Computes the base-2 logarithm of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double log2(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-2 logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "log2f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型对数函数(base 2)",
      "desc_en": "Computes the base-2 logarithm of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float log2f(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-2 logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "clog2",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型对数函数(base 2)",
      "desc_en": "Computes the base-2 logarithm of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "clog2f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型对数函数(base 2)",
      "desc_en": "Computes the base-2 logarithm of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "log10",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型对数函数(base 10)",
      "desc_en": "Computes the base-10 logarithm of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double log10(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-10 logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "log10f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型对数函数(base 10)",
      "desc_en": "Computes the base-10 logarithm of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float log10f(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "The base-10 logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "clog10",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型对数函数(base 10)",
      "desc_en": "Computes the base-10 logarithm of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "clog10f",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型对数函数(base 10)",
      "desc_en": "Computes the base-10 logarithm of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "pow",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型幂函数",
      "desc_en": "Raises double-precision floating-point real type x to the specified power",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double pow(double x, double y)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data",
        "y: Floating-point value of the input data"
      ],
      "return": [
        "If x is greater than 0 and y is a decimal, the result of x raised to the power of y is returned. The value range is r ∈ (-INF, +INF).",
        "If x is -0 and y is a negative odd number, the return value is -∞. If x is ±0 and y is a negative number, the return value is +∞.",
        "If x is ±0 and y is a positive odd number, the return value is ±0. If x is ±0, y is a positive number, the return value is +0.",
        "If x is ±1 and y is any value, the return value is +1.",
        "If x is any value and y is ±0, the return value is +1.",
        "If x is less than +0 and y is a decimal, the return value is NaN.",
        "If |x| is less than 1 and y is -∞, the return value is +∞. If |x| is greater than 1 and y is -∞, the return value is +0.",
        "If |x| is less than 1 and y is +∞, the return value is +0. If |x| is greater than 1 and y is +∞, the return value is +∞.",
        "If x is -∞ and y is a negative odd number, the return value is -0. If x is -∞ and y is a negative number, the return value is +0.",
        "If x is -∞ and y is a positive odd number, the return value is -∞. If x is -∞ and y is a positive number, the return value is +∞.",
        "If x is +∞ and y is less than +0, the return value is +0. If x is +∞ and y is greater than or equal to +0, the return value is +∞.",
        "If x and y are both large finite numbers, the return value is ±∞.",
        "If both x and y are NaN, the return value is NaN."
      ]
    },
    {
      "name": "powf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型幂函数",
      "desc_en": "Raises single-precision floating-point real type x to the specified power",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float powf(float x, float y)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data",
        "y: Floating-point value of the input data"
      ],
      "return": [
        "If x is greater than 0 and y is a decimal, the result of x raised to the power of y is returned. The value range is r ∈ (-INF, +INF).",
        "If x is -0 and y is a negative odd number, the return value is -∞. If x is ±0 and y is a negative number, the return value is +∞.",
        "If x is ±0 and y is a positive odd number, the return value is ±0. If x is ±0, y is a positive number, the return value is +0.",
        "If x is ±1 and y is any value, the return value is +1.",
        "If x is any value and y is ±0, the return value is +1.",
        "If x is less than +0 and y is a decimal, the return value is NaN.",
        "If |x| is less than 1 and y is -∞, the return value is +∞. If |x| is greater than 1 and y is -∞, the return value is +0.",
        "If |x| is less than 1 and y is +∞, the return value is +0. If |x| is greater than 1 and y is +∞, the return value is +∞.",
        "If x is -∞ and y is a negative odd number, the return value is -0. If x is -∞ and y is a negative number, the return value is +0.",
        "If x is -∞ and y is a positive odd number, the return value is -∞. If x is -∞ and y is a positive number, the return value is +∞.",
        "If x is +∞ and y is less than +0, the return value is +0. If x is +∞ and y is greater than or equal to +0, the return value is +∞.",
        "If x and y are both large finite numbers, the return value is ±∞.",
        "If both x and y are NaN, the return value is NaN."
      ]
    },
    {
      "name": "cpow",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型幂函数",
      "desc_en": "Raises double-precision floating-point complex type x to the specified power",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "cpowf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型幂函数",
      "desc_en": "Raises single-precision floating-point complex type x to the specified power",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "sqrt",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型平方根函数",
      "desc_en": "Computes the square root of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float sqrt(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "If the input x is ±0, the return value is ±0.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "sqrtf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型平方根函数",
      "desc_en": "Computes the square root of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double sqrtf(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "If the input x is ±0, the return value is ±0.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "csqrt",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型平方根函数",
      "desc_en": "Computes the square root of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "csqrtf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型平方根函数",
      "desc_en": "Computes the square root of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "cbrt",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度浮点类型立方根函数",
      "desc_en": "Computes the cubic root of double-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "double cbrt(double x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "If the input x is ±0, the return value is ±0.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "cbrtf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度浮点类型立方根函数",
      "desc_en": "Computes the cubic root of single-precision floating-point real type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement",
      "func_name": "float cbrtf(float x)",
      "headerfile_desc": "km.h",
      "parameters": [
        "x: Floating-point value of the input data"
      ],
      "return": [
        "If the input x is ±0, the return value is ±0.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "ccbrt",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "双精度复数类型立方根函数",
      "desc_en": "Computes the cubic root of double-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "ccbrtf",
      "library": "KML_MATH",
      "headerfile": "",
      "desc_cn": "单精度复数类型立方根函数",
      "desc_en": "Computes the cubic root of single-precision floating-point complex type x",
      "benefit_cn": "通过周期函数规约、算法改进等手段，提供了基于鲲鹏芯片性能提升较大的函数实现",
      "benefit_en": "Provides functions with high performance that is based on Kunpeng processors by means of Periodic function range reduction and algorithm improvement"
    },
    {
      "name": "svml128_sin_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正弦函数",
      "desc_en": "Computes the sine of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_sin_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_sin_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The sine y of radiant angle x is returned. y ∈ [-1, +1]",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_sin_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正弦函数",
      "desc_en": "Computes the sine of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_sin_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_sin_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The sine y of radiant angle x is returned. y ∈ [-1, +1]",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_cos_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型余弦函数",
      "desc_en": "Computes the cosine of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_cos_f32(float32x4_t src);",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_cos_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The cosine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_cos_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型余弦函数",
      "desc_en": "Computes the cosine of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_cos_f64(float64x2_t src);",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_cos_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The cosine y of radiant angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_sincos_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正弦、余弦函数",
      "desc_en": "Computes the sine and cosine of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void svml128_sincos_f32(float32x4_t src, float32x4_t *sindst, float32x4_t *cosdst)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_sincos_f32, src is of single-precision floating-point type.",
        "sindst: Floating-point value of the output vector. If the pointer is null, the system prompts a null pointer error. For svml128_sincos_f32, sindst is of single-precision floating-point type.",
        "cosdst: Floating-point value of the output vector. If the pointer is null, the system prompts a null pointer error. For svml128_sincos_f32, cosdst is of single-precision floating-point type."
      ],
      "return": [
        "The sine sinx and cosine cosx of the radian angle x is returned for each operation value. sinx ∈ [-1, +1]; cosx ∈ [-1, +1].",
        "If the input is +0, the sine is +0 and the cosine is +1.",
        "If the input is -0, the sine is -0 and the cosine is +1.",
        "If the input is ±∞, the sine and cosine are both NaN.",
        "If the input is NaN, the sine and cosine are both NaN."
      ]
    },
    {
      "name": "svml128_sincos_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正弦、余弦函数",
      "desc_en": "Computes the sine and cosine of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void svml128_sincos_f64(float64x2_t src, float64x2_t *sindst ,float64x2_t *cosdst)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_sincos_f64, src is of double-precision floating-point type.",
        "sindst: Floating-point value of the output vector. If the pointer is null, the system prompts a null pointer error. For svml128_sincos_f64, sindst is of double-precision floating-point type.",
        "cosdst: Floating-point value of the output vector. If the pointer is null, the system prompts a null pointer error. For svml128_sincos_f64, cosdst is of double-precision floating-point type."
      ],
      "return": [
        "The sine sinx and cosine cosx of the radian angle x is returned for each operation value. sinx ∈ [-1, +1]; cosx ∈ [-1, +1].",
        "If the input is +0, the sine is +0 and the cosine is +1.",
        "If the input is -0, the sine is -0 and the cosine is +1.",
        "If the input is ±∞, the sine and cosine are both NaN.",
        "If the input is NaN, the sine and cosine are both NaN."
      ]
    },
    {
      "name": "svml128_tan_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型正切函数",
      "desc_en": "Computes the tangent of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_tan_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_tan_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The tangent y of radiant angle x is returned. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN.",
        "This function has a mathematical pole at π(1/2 + n). Because no general floating-point numbers can accurately represent the value at the pole, the return value for the pole cannot be considered reliable."
      ]
    },
    {
      "name": "svml128_tan_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型正切函数",
      "desc_en": "Computes the tangent of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_tan_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_tan_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The tangent y of radiant angle x is returned. y ∈ (-INF, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is NaN.",
        "If the input is NaN, the return value is NaN.",
        "This function has a mathematical pole at π(1/2 + n). Because no general floating-point numbers can accurately represent the value at the pole, the return value for the pole cannot be considered reliable."
      ]
    },
    {
      "name": "svml128_atan_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_atan_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_atan_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The unique angle whose tangent value is x at (–π/2, π/2) is returned. The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_atan_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_atan_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_atan_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The unique angle whose tangent value is x at (–π/2, π/2) is returned. The value range is (–π/2, +π/2).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is +∞, the return value is +π/2.",
        "If the input is -∞, the return value is -π/2.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_atan2_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_atan2_f32(float32x4_t src1, float32x4_t src2)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src1: Floating-point value of the input vector. For svml128_atan2_f32, src1 is of single-precision floating-point type.",
        "src2: Floating-point value of the input vector. For svml128_atan2_f32, src2 is of single-precision floating-point type."
      ],
      "return": [
        "For atan2(y, x): An azimuth from the origin to the point (x, y) is returned, that is, the included angle with reference to the x-axis. The unit of the return value is radian, and the value range is (-π, +π].",
        "If y is ±0 and x is negative or -0, the return value is ±π.",
        "If y is ±0 and x is positive or +0, the return value is ±0.",
        "If y is ±∞ and x is a finite number, the return value is ±π/2.",
        "If y is ±∞ and x is –∞, the return value is ±3π/4.",
        "If y is ±∞ and x is +∞, the return value is ±π/4.",
        "If x is ±0 and y is negative, the return value is -π/2.",
        "If x is ±0 and y is positive, the return value is +π/2.",
        "If x is -∞ and y is finite positive, the return value is +π.",
        "If x is -∞ and y is finite negative, the return value is -π.",
        "If x is +∞ and y is finite positive, the return value is +0.",
        "If x is +∞ and y is finite negative, the return value is -0.",
        "If x or y is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_atan2_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型反正切函数",
      "desc_en": "Computes the arc tangent of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_atan2_f64(float64x2_t src1, float64x2_t src2);",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src1: Floating-point value of the input vector. For svml128_atan2_f64, src1 is of double-precision floating-point type.",
        "src2: Floating-point value of the input vector.For svml128_atan2_f64, src2 is of double-precision floating-point type."
      ],
      "return": [
        "For atan2(y, x): An azimuth from the origin to the point (x, y) is returned, that is, the included angle with reference to the x-axis. The unit of the return value is radian, and the value range is (-π, +π].",
        "If y is ±0 and x is negative or -0, the return value is ±π.",
        "If y is ±0 and x is positive or +0, the return value is ±0.",
        "If y is ±∞ and x is a finite number, the return value is ±π/2.",
        "If y is ±∞ and x is –∞, the return value is ±3π/4.",
        "If y is ±∞ and x is +∞, the return value is ±π/4.",
        "If x is ±0 and y is negative, the return value is -π/2.",
        "If x is ±0 and y is positive, the return value is +π/2.",
        "If x is -∞ and y is finite positive, the return value is +π.",
        "If x is -∞ and y is finite negative, the return value is -π.",
        "If x is +∞ and y is finite positive, the return value is +0.",
        "If x is +∞ and y is finite negative, the return value is -0.",
        "If x or y is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_sinh_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_sinh_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_sinh_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "Return the hyperbolic sine function value y of the radian angle x. y ∈ [-INF, +INF].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±INF.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_sinh_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲正弦函数",
      "desc_en": "Computes the hyperbolic sine of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_sinh_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_sinh_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "Return the hyperbolic sine function value y of the radian angle x. y ∈ [-INF, +INF].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±INF.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_cosh_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_cosh_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_cosh_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The hyperbolic cosine y of the radian angle x is returned. y ∈ [+1, +INF].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input is ±∞, the return value is INF.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_cosh_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲余弦函数",
      "desc_en": "Computes the hyperbolic cosine of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_cosh_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_cosh_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The hyperbolic cosine y of the radian angle x is returned. y ∈ [+1, +INF].",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is +1.",
        "If the input is ±∞, the return value is INF.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_tanh_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_tanh_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_tanh_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The hyperbolic tangent y of the radian angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±1.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_tanh_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型双曲正切函数",
      "desc_en": "Computes the hyperbolic tangent of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_tanh_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_tanh_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The hyperbolic tangent y of the radian angle x is returned. y ∈ [-1, +1].",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If the input is ±∞, the return value is ±1.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_exp_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_exp_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_exp_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The base-e exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.. ptrfiff the input is -0, the return value is -1.",
        "If svml128_exp_f32 is greater than 0x1.62e42ep6f (about 88), +∞ is returned.",
        "If svml128_exp_f64 is greater than 0x1.62p9 (about 708), +∞ is returned.",
        "If the input of svml128_exp_f32 is less than -0x1.9fe368p6f (about –104), +0 is returned.",
        "If the input of svml128_exp_f64 is less than -0x1.62p9 (about –708), +0 is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_exp_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_exp_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_exp_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The base-e exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.. ptrfiff the input is -0, the return value is -1.",
        "If svml128_exp_f32 is greater than 0x1.62e42ep6f (about 88), +∞ is returned.",
        "If svml128_exp_f64 is greater than 0x1.62p9 (about 708), +∞ is returned.",
        "If the input of svml128_exp_f32 is less than -0x1.9fe368p6f (about –104), +0 is returned.",
        "If the input of svml128_exp_f64 is less than -0x1.62p9 (about –708), +0 is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_exp2_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型指数函数(base 2)",
      "desc_en": "Computes the base-2 exponential of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_exp2_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_exp2_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The base-2 exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is -1.",
        "If input of expf is greater than 128, the return value is +∞.",
        "If the input of exp is greater than 1024, the return value is +∞.",
        "If the input of expf is less than -150, the return value is +0.",
        "If the input of exp is less than -1075, the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_exp2_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型指数函数(base 2)",
      "desc_en": "Computes the base-2 exponential of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_exp2_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_exp2_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The base-2 exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +1.",
        "If the input is -0, the return value is -1.",
        "If input of expf is greater than 128, the return value is +∞.",
        "If the input of exp is greater than 1024, the return value is +∞.",
        "If the input of expf is less than -150, the return value is +0.",
        "If the input of exp is less than -1075, the return value is +0.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_expm1_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_expm1_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_expm1_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The base-e exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If svml128_expm1_f32 is greater than 0x1.62e42ep6f (about 88), +∞ is returned.",
        "If svml128_expm1_f64 is greater than 0x1.62p9 (about 708), +∞ is returned.",
        "If the input of svml128_expm1_f32 is less than -0x1.9fe368p6f (about –104), +0 is returned.",
        "If the input of svml128_expm1_f64 is less than -0x1.62p9 (about –708), +0 is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_expm1_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型指数函数(base e)",
      "desc_en": "Computes the base-e exponential of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_expm1_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_expm1_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The base-e exponential y of x is returned. y ∈ (0, +INF).",
        "If the input is +0, the return value is +0.",
        "If the input is -0, the return value is -0.",
        "If svml128_expm1_f32 is greater than 0x1.62e42ep6f (about 88), +∞ is returned.",
        "If svml128_expm1_f64 is greater than 0x1.62p9 (about 708), +∞ is returned.",
        "If the input of svml128_expm1_f32 is less than -0x1.9fe368p6f (about –104), +0 is returned.",
        "If the input of svml128_expm1_f64 is less than -0x1.62p9 (about –708), +0 is returned.",
        "If the input is +∞, the return value is +∞.",
        "If the input is -∞, the return value is +0.",
        "If the input is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_log_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_log_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_log_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The base-e logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_log_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_log_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_log_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The base-e logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_log10_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型对数函数(base 10)",
      "desc_en": "Computes the base-10 logarithm of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_log10_f32(float32x4_t src);",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_log10_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The base-10 logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_log10_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型对数函数(base 10)",
      "desc_en": "Computes the base-10 logarithm of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_log10_f64(float64x2_t src);",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_log10_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The base-10 logarithm y of x is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is -∞.",
        "If the input x is 1, the return value is 0.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_log1p_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of single-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_log1p_f32(float32x4_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_log1p_f32, src is of single-precision floating-point type."
      ],
      "return": [
        "The base-e logarithm y of (x + 1) is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is ±0.",
        "If the input x is -1, the return value is -INF.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_log1p_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型对数函数(base e)",
      "desc_en": "Computes the base-e logarithm of double-precision floating-point type vector",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_log1p_f64(float64x2_t src)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src: Floating-point value of the input vector. For svml128_log1p_f64, src is of double-precision floating-point type."
      ],
      "return": [
        "The base-e logarithm y of (x + 1) is returned. y ∈ (-INF, +INF).",
        "If the input x is ±0, the return value is ±0.",
        "If the input x is -1, the return value is -INF.",
        "If the input x is a negative number, the return value is NaN.",
        "If the input x is +∞, the return value is +∞.",
        "If the input x is NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_pow_f32",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "单精度浮点类型幂函数",
      "desc_en": "Raises single-precision floating-point type vector to the specified power",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float32x4_t svml128_pow_f32(float32x4_t src1, float32x4_t src2)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src1: Floating-point value of the input vector. For svml128_pow_f32, src1 is of single-precision floating-point type.",
        "src2: Floating-point value of the input vector. For svml128_pow_f32, src2 is of single-precision floating-point type."
      ],
      "return": [
        "If x is greater than 0 and y is a decimal, the result of x raised to the power of y is returned. The value range is r ∈ (-INF, +INF).",
        "If x is -0 and y is a negative odd number, the return value is -∞.",
        "If x is ±0 and y is a negative number, the return value is +∞.",
        "If x is ±0 and y is a positive odd number, the return value is ±0.",
        "If x is ±0, y is a positive number, the return value is +0.",
        "If x is ±1 and y is any value, the return value is +1.",
        "If x is any value and y is ±0, the return value is +1.",
        "If x is less than +0 and y is a decimal, the return value is NaN.",
        "If |x| is less than 1 and y is -∞, the return value is +∞.",
        "If |x| is greater than 1 and y is -∞, the return value is +0.",
        "If |x| is less than 1 and y is +∞, the return value is +0.",
        "If |x| is greater than 1 and y is +∞, the return value is +∞.",
        "If x is -∞ and y is a negative odd number, the return value is -0.",
        "If x is -∞ and y is a negative number, the return value is +0.",
        "If x is -∞ and y is a positive odd number, the return value is -∞.",
        "If x is -∞ and y is a positive number, the return value is +∞.",
        "If x is +∞ and y is less than +0, the return value is +0.",
        "If x is +∞ and y is greater than or equal to +0, the return value is +∞.",
        "If x and y are both large finite numbers, the return value is ±∞.",
        "If both x and y are NaN, the return value is NaN."
      ]
    },
    {
      "name": "svml128_pow_f64",
      "library": "KML_SVML",
      "headerfile": "",
      "desc_cn": "双精度浮点类型幂函数",
      "desc_en": "Raises double-precision floating-point type vector to the specified power",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "float64x2_t svml128_pow_f64(float64x2_t src1, float64x2_t src2)",
      "headerfile_desc": "ksvml.h",
      "parameters": [
        "src1: Floating-point value of the input vector. For svml128_pow_f64, src1 is of double-precision floating-point type.",
        "src2: Floating-point value of the input vector. For svml128_pow_f64, src2 is of double-precision floating-point type."
      ],
      "return": [
        "If x is greater than 0 and y is a decimal, the result of x raised to the power of y is returned. The value range is r ∈ (-INF, +INF).",
        "If x is -0 and y is a negative odd number, the return value is -∞.",
        "If x is ±0 and y is a negative number, the return value is +∞.",
        "If x is ±0 and y is a positive odd number, the return value is ±0.",
        "If x is ±0, y is a positive number, the return value is +0.",
        "If x is ±1 and y is any value, the return value is +1.",
        "If x is any value and y is ±0, the return value is +1.",
        "If x is less than +0 and y is a decimal, the return value is NaN.",
        "If |x| is less than 1 and y is -∞, the return value is +∞.",
        "If |x| is greater than 1 and y is -∞, the return value is +0.",
        "If |x| is less than 1 and y is +∞, the return value is +0.",
        "If |x| is greater than 1 and y is +∞, the return value is +∞.",
        "If x is -∞ and y is a negative odd number, the return value is -0.",
        "If x is -∞ and y is a negative number, the return value is +0.",
        "If x is -∞ and y is a positive odd number, the return value is -∞.",
        "If x is -∞ and y is a positive number, the return value is +∞.",
        "If x is +∞ and y is less than +0, the return value is +0.",
        "If x is +∞ and y is greater than or equal to +0, the return value is +∞.",
        "If x and y are both large finite numbers, the return value is ±∞.",
        "If both x and y are NaN, the return value is NaN."
      ]
    },
    {
      "name": "cblas_saxpy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量缩放与加和",
      "desc_en": "Vector scaling and summation",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_saxpy(const BLASINT n, const float alpha, const float *x, const BLASINT incx, float *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "alpha: Multiplication coefficient. Single-precision floating-point type for saxpy.",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For saxpy, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For saxpy, y is of single-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_daxpy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量缩放与加和",
      "desc_en": "Vector scaling and summation",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_daxpy(const BLASINT n, const double alpha, const double *x, const BLASINT incx, double *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "alpha: Multiplication coefficient. Double-precision floating-point type for daxpy",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For daxpy, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For daxpy, y is of double-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_caxpy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量缩放与加和",
      "desc_en": "Vector scaling and summation",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_caxpy(const BLASINT n, const void *alpha, const void *x, const BLASINT incx, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "alpha: Multiplication coefficient. Single-precision complex type for caxpy.",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For caxpy, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For caxpy, y is of single-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_zaxpy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量缩放与加和",
      "desc_en": "Vector scaling and summation",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zaxpy(const BLASINT n, const void *alpha, const void *x, const BLASINT incx, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "alpha: Multiplication coefficient. Double-precision complex type for zaxpy.",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For zaxpy, x is of double-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zaxpy, y is of double-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_sasum",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量元素绝对值求和",
      "desc_en": "Sum of absolute values of vector elements",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float cblas_sasum(const BLASINT n, const float *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For sasum, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns a float."
    },
    {
      "name": "cblas_dasum",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量元素绝对值求和",
      "desc_en": "Sum of absolute values of vector elements",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double cblas_dasum(const BLASINT n, const double *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For dasum, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns a float."
    },
    {
      "name": "cblas_scasum",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量元素绝对值求和",
      "desc_en": "Sum of absolute values of vector elements",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float cblas_scasum(const BLASINT n, const void *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For scasum, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns a float."
    },
    {
      "name": "cblas_dzasum",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量元素绝对值求和",
      "desc_en": "Sum of absolute values of vector elements",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double cblas_dzasum(const BLASINT n, const void *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For zasum, x is of double-precision complex number type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns a double."
    },
    {
      "name": "cblas_scopy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量拷贝",
      "desc_en": "Vector copy",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_scopy(const BLASINT n, const float *x, const BLASINT incx, float *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For scopy, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For scopy, y is of single-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_dcopy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量拷贝",
      "desc_en": "Vector copy",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dcopy(const BLASINT n, const double *x, const BLASINT incx, double *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For dcopy, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For dcopy, y is of double-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_ccopy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量拷贝",
      "desc_en": "Vector copy",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ccopy(const BLASINT n, const void *x, const BLASINT incx, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For ccopy, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For ccopy, y is of single-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_zcopy",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量拷贝",
      "desc_en": "Vector copy",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zcopy(const BLASINT n, const void *x, const BLASINT incx, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For zcopy, x is of double-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zcopy, y is of double-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_sdot",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量点积",
      "desc_en": "Vector dot product",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float cblas_sdot(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy);",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For sdot, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For sdot, y is of single-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": "This function returns a vector point product result. it is of single-precision floating-point type."
    },
    {
      "name": "cblas_ddot",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量点积",
      "desc_en": "Vector dot product",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double cblas_ddot(const BLASINT n, const double *x, const BLASINT incx, const double *y, const BLASINT incy);",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in vector x",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For ddot, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For ddot, y is of double-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": "This function returns a vector point product result. it is of double-precision floating-point type."
    },
    {
      "name": "cblas_cdotc",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型共轭向量与另一向量的点积",
      "desc_en": "Dot product of a conjugate vector and another vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float _Complex cblas_cdotc(const BLASINT n, const void *x, const BLASINT incx, const void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For cdotc, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zdotc, x is of double-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": "This function returns a single-precision complex number."
    },
    {
      "name": "cblas_zdotc",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型共轭向量与另一向量的点积",
      "desc_en": "Dot product of a conjugate vector and another vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double _Complex cblas_zdotc(const BLASINT n, const void *x, const BLASINT incx, const void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For cdotc, y is of single-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zdotc, y is of double-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": "This function returns a double-precision complex number type."
    },
    {
      "name": "cblas_cdotu",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数向量点积",
      "desc_en": "Complex vector dot product",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float _Complex cblas_cdotu(const BLASINT n, const void *x, const BLASINT incx, const void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For cdotu, y is of single-precision complex number type.",
        "incx: Increment for the elements in vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zdotu, y is of double-precision complex number type.",
        "incy: Increment for the elements in vector y"
      ],
      "return": "This function returns a double-precision complex number type."
    },
    {
      "name": "cblas_zdotu",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数向量点积",
      "desc_en": "Complex vector dot product",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double _Complex cblas_zdotu(const BLASINT n, const void *x, const BLASINT incx, const void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For cdotu, y is of single-precision complex number type.",
        "incx: Increment for the elements in vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zdotu, y is of double-precision complex number type.",
        "incy: Increment for the elements in vector y"
      ],
      "return": "This function returns a double-precision complex number type."
    },
    {
      "name": "cblas_ssdot",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型扩展精度点积",
      "desc_en": "Extended precision dot product",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float cblas_sdsdot(const BLASINT n, const float alpha, const float *x, const BLASINT incx, const float *y, const BLASINT incy)",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "alpha: Single-precision scalar, which is added to the dot product and returned.",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)).",
        "incx: Increment of vector elements in x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)).",
        "incy: Increment for the elements of vector y"
      ],
      "return": "This function returns a result of vector dot product. It is of single-precision type."
    },
    {
      "name": "cblas_dsdot",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型扩展精度点积",
      "desc_en": "Extended precision dot product",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double cblas_dsdot(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)).",
        "incx: Increment of vector elements in x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)).",
        "incy: Increment for the elements of vector y"
      ],
      "return": "This function returns a result of vector dot product. It is of double-precision type."
    },
    {
      "name": "cblas_isamax",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量中最大绝对值的索引",
      "desc_en": "Index of the maximum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_isamax(const BLASINT n, const float *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For isamax, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the index value of the maximum absolute value. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_idamax",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量中最大绝对值的索引",
      "desc_en": "Index of the maximum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_idamax(const BLASINT n, const double *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For idamax, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the index value of the maximum absolute value. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_icamax",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量中最大绝对值的索引",
      "desc_en": "Index of the maximum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_icamax(const BLASINT n, const void *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For icamax, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the maximum index value of the sum of the absolute value of the real part and the absolute value of the imaginary part. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_izamax",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量中最大绝对值的索引",
      "desc_en": "Index of the maximum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_izamax(const BLASINT n, const void *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For izamax, x is of double-precision complex number type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the maximum index value of the sum of the absolute value of the real part and the absolute value of the imaginary part. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_isamin",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量中最小绝对值的索引",
      "desc_en": "Index of the minimum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_isamin(const BLASINT n, const float *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For isamin, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the index value of the minimum absolute value. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_idamin",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量中最小绝对值的索引",
      "desc_en": "Index of the minimum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_idamin(const BLASINT n, const double *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For idamin, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the index value of the minimum absolute value. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_icamin",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量中最小绝对值的索引",
      "desc_en": "Index of the minimum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_icamin(const BLASINT n, const void *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For icamin, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the minimum index value of the sum of the absolute value of the real part and the absolute value of the imaginary part. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_izamin",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量中最小绝对值的索引",
      "desc_en": "Index of the minimum absolute value in the vector",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "CBLAS_INDEX cblas_izamin(const BLASINT n, const void *x, const BLASINT incx)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For izamin, x is of double-precision complex number type.",
        "incx: Increment for the elements of vector x"
      ],
      "return": "This function returns the minimum index value of the sum of the absolute value of the real part and the absolute value of the imaginary part. It is of the CBLAS_INDEX type."
    },
    {
      "name": "cblas_snrm2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "计算欧几里得范数",
      "desc_en": "Euclidean norm",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float cblas_snrm2 (const BLASINT N, const float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in the X vector",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For snrm2, X is of single-precision floating-point type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": "This function returns the vector norm. It is of single-precision floating-point type."
    },
    {
      "name": "cblas_dnrm2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "计算欧几里得范数",
      "desc_en": "Euclidean norm",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double cblas_dnrm2 (const BLASINT N, const double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in the X vector",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For dnrm2, X is of double-precision floating-point type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": "This function returns the vector norm. It is of double-precision floating-point type."
    },
    {
      "name": "cblas_scnrm2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "计算欧几里得范数",
      "desc_en": "Euclidean norm",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "float cblas_scnrm2 (const BLASINT N, const float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in the X vector",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For scnrm2, X is of single-precision complex number type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": "This function returns the vector norm. It is of single-precision floating-point type."
    },
    {
      "name": "cblas_dznrm2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "计算欧几里得范数",
      "desc_en": "Euclidean norm",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "double cblas_dznrm2 (const BLASINT N, const double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in the X vector",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For dznrm2, X is of double-precision complex number type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": "This function returns the vector norm. It is of double-precision floating-point type."
    },
    {
      "name": "cblas_srot",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型基于平面上点旋转",
      "desc_en": "Dot rotation based on the plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_srot(const BLASINT N, float *X, const BLASINT incX, float *Y, const BLASINT incY, const float c, const float s)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For srot, X is of single-precision floating-point type.",
        "incX: Increment for the elements of vector X",
        "Y: Vector Y. The vector size is at least (1+(n-1)*abs(incY)). For srot, Y is of single-precision floating-point type.",
        "incY: Increment for elements in vector Y",
        "c: Cosine of the rotation angle. For srot, c is of single-precision floating-point type.",
        "s: Sine of the rotation angle. For srot, s is of single-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_drot",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型基于平面上点旋转",
      "desc_en": "Dot rotation based on the plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_drot(const BLASINT N, double *X, const BLASINT incX, double *Y, const BLASINT incY, const double c, const double s)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For drot, X is of double-precision floating-point type.",
        "incX: Increment for the elements of vector X",
        "Y: Vector Y. The vector size is at least (1+(n-1)*abs(incY)). For srot, Y is of single-precision floating-point type.",
        "incY: Increment for elements in vector Y",
        "c: Cosine of the rotation angle. For drot, c is of double-precision floating-point type.",
        "s: Sine of the rotation angle. For drot, s is of double-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_srotg",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型构造旋转平面",
      "desc_en": "Constructs the rotating plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_srotg(float *a, float *b, float *c, float *s)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "a: X coordinate of the Cartesian coordinate system. For srotg, a is of single-precision floating-point type.",
        "b: Y coordinate of the Cartesian coordinate system. For srotg, b is of single-precision floating-point type.",
        "c: Givens rotation parameter c. For srotg, c is of single-precision floating-point type.",
        "s: Givens rotation parameter s. For srotg, s is of single-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_drotg",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型构造旋转平面",
      "desc_en": "Constructs the rotating plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_drotg(double *a, double *b, double *c, double *s)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "a: X coordinate of the Cartesian coordinate system. For drotg, a is of double-precision floating-point type.",
        "b: Y coordinate of the Cartesian coordinate system. For drotg, b is of double-precision floating-point type.",
        "c: Givens rotation parameter c. For drotg, c is of double-precision floating-point type.",
        "s: Givens rotation parameter s. For drotg, s is of double-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_srotm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型修改旋转平面",
      "desc_en": "Modifies the rotation plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_srotm(const BLASINT N, float *X, const BLASINT incX, float *Y, const BLASINT incY, const float *P)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in the X vector, For srotm, X is of single-precision floating-point type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For srotm, X is of single-precision floating-point type.",
        "incX: Increment for the elements of vector x",
        "Y: Vector Y. The vector scale is at least (1+(n-1)*abs(incY)). For srotm, Y is of single-precision floating-point type.",
        "incY: Increment for elements in vector Y",
        "P: Conversion matrix H. For srotm, P is of single-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_drotm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型修改旋转平面",
      "desc_en": "Modifies the rotation plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_drotm(const BLASINT N, double *X, const BLASINT incX, double *Y, const BLASINT incY, const double *P)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in the X vector, For srotm, X is of single-precision floating-point type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For drotm, X is of double-precision floating-point type.",
        "incX: Increment for the elements of vector x",
        "Y: Vector Y. The vector scale is at least (1+(n-1)*abs(incY)). For drotm, Y is of double-precision floating-point type.",
        "incY: Increment for elements in vector Y",
        "P: Conversion matrix H. For drotm, P is of double-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_srotmg",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型构造修改的旋转平面",
      "desc_en": "Constructs the modified conversion plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "d1: For srotmg, d1 is of single-precision floating-point type.",
        "d2: For srotmg, d2 is of single-precision floating-point type.",
        "b1: For srotmg, b1 is of single-precision floating-point type.",
        "b2: For srotmg, b2 is of single-precision floating-point type.",
        "P: Conversion matrix H. For srotmg, P is of single-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_drotmg",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型构造修改的旋转平面",
      "desc_en": "Constructs the modified conversion plane",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "d1: For drotmg, d1 is of double-precision floating-point type.",
        "d2: For drotmg, d2 is of double-precision floating-point type.",
        "b1: For drotmg, b1 is of double-precision floating-point type.",
        "b2: For drotmg, b2 is of double-precision floating-point type.",
        "P: Conversion matrix H. For drotmg, P is of double-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_sscal",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量缩放",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sscal(const BLASINT N, const float alpha, float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "alpha: Coefficient. For sscal, alpha is of single-precision floating-point type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For sscal, X is of single-precision floating-point type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": ""
    },
    {
      "name": "cblas_dscal",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量缩放",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dscal(const BLASINT N, const double alpha, double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "alpha: Coefficient. For dscal, alpha is of double-precision floating-point type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For dscal, X is of double-precision floating-point type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": ""
    },
    {
      "name": "cblas_cscal",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量缩放",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cscal(const BLASINT N, const void *alpha, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "alpha: Coefficient. For cscal, alpha is of single-precision complex number type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For cscal, X is of single-precision complex number type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": ""
    },
    {
      "name": "cblas_zscal",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量缩放",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zscal(const BLASINT N, const void *alpha, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "alpha: Coefficient. For zscal, alpha is of double-precision complex number type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For zscal, X is of double-precision complex number type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": ""
    },
    {
      "name": "cblas_csscal",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量缩放",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_csscal(const BLASINT N, const double alpha, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "alpha: Coefficient. For csscal, alpha is of single-precision floating-point type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For csscal, X is of single-precision complex number type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": ""
    },
    {
      "name": "cblas_zdscal",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量缩放",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zdscal(const BLASINT N, const double alpha, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "N: Number of elements in vector X",
        "alpha: Coefficient. For zdscal, alpha is of double-precision floating-point type.",
        "X: Vector X. The vector size is at least (1+(n-1)*abs(incX)). For zdscal, X is of double-precision complex number type.",
        "incX: Increment for the elements of vector X"
      ],
      "return": ""
    },
    {
      "name": "cblas_sswap",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型两个向量元素交换",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sswap(const BLASINT n, float *x, const BLASINT incx, float *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For sswap, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For sswap, y is of double-precision floating-point type.",
        "incy: Vector unit increment step in y"
      ],
      "return": ""
    },
    {
      "name": "cblas_dswap",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型两个向量元素交换",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dswap(const BLASINT n, double *x, const BLASINT incx, double *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For dswap, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For dswap, y is of single-precision floating-point type.",
        "incy: Vector unit increment step in y"
      ],
      "return": ""
    },
    {
      "name": "cblas_cswap",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型两个向量元素交换",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cswap(const BLASINT n, void *x, const BLASINT incx, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For cswap, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For cswap, y is of single-precision complex number type.",
        "incy: Vector unit increment step in y"
      ],
      "return": ""
    },
    {
      "name": "cblas_zswap",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型两个向量元素交换",
      "desc_en": "Vector scaling",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zswap(const BLASINT n, void *x, const BLASINT incx, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x and y vectors",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For zswap, x is of double-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zswap, y is of double-precision complex number type.",
        "incy: Vector unit increment step in y"
      ],
      "return": ""
    },
    {
      "name": "cblas_sgbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量与带状矩阵乘积",
      "desc_en": "Product of a vector and a band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const BLASINT M, const BLASINT N, const BLASINT KL, const BLASINT KU, const float alpha, const float *A, const BLASINT lda, const float *X, const BLASINT incX, const float beta, float *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then y = alpha * A * x + beta * y. If TransA = CblasTrans, then y = alpha * A * x + beta * y. If TransA = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "M: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "N: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "KL: Subdiagonal order of A. The value of KL must be greater than or equal to 0.",
        "KU: Hyperdiagonal order of A. The value of KU must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For sgbmv, alpha is of single-precision floating-point type.",
        "A: Band matrix. The matrix size is lda*N. For sgbmv, A is of single-precision floating-point type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (KL + KU + 1).",
        "X: Vector X. If TransA = CblasNoTrans, the vector size is at least (1+(N-1)*abs(incX)). Otherwise, the vector size must be at least (1+(M-1)*abs(incX)). For sgbmv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For sgbmv, beta is of single-precision floating-point type.",
        "Y: Vector Y. If TransA = CblasNoTrans, the vector size is at least (1+(M-1)*abs(incY)). Otherwise, the vector size is at least (1+(N-1)*abs(incY)). For sgbmv, Y is of single-precision floating-point type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dgbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量与带状矩阵乘积",
      "desc_en": "Product of a vector and a band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const BLASINT M, const BLASINT N, const BLASINT KL, const BLASINT KU, const double alpha, const double *A, const BLASINT lda, const double *X, const BLASINT incX, const double beta, double *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then y = alpha * A * x + beta * y. If TransA = CblasTrans, then y = alpha * A * x + beta * y. If TransA = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "M: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "N: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "KL: Subdiagonal order of A. The value of KL must be greater than or equal to 0.",
        "KU: Hyperdiagonal order of A. The value of KU must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For dgbmv, alpha is of double-precision floating-point type.",
        "A: Band matrix. The matrix size is lda*N. For dgbmv, A is of double-precision floating-point type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (KL + KU + 1).",
        "X: Vector X. If TransA = CblasNoTrans, the vector size is at least (1+(N-1)*abs(incX)). Otherwise, the vector size must be at least (1+(M-1)*abs(incX)). For dgbmv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For dgbmv, beta is of double-precision floating-point type.",
        "Y: Vector Y. If TransA = CblasNoTrans, the vector size is at least (1+(M-1)*abs(incY)). Otherwise, the vector size is at least (1+(N-1)*abs(incY)). For dgbmv, Y is of double-precision floating-point type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_cgbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量与带状矩阵乘积",
      "desc_en": "Product of a vector and a band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const BLASINT M, const BLASINT N, const BLASINT KL, const BLASINT KU, const void *alpha, const void *A, const BLASINT lda, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then y = alpha * A * x + beta * y. If TransA = CblasTrans, then y = alpha * A * x + beta * y. If TransA = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "M: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "N: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "KL: Subdiagonal order of A. The value of KL must be greater than or equal to 0.",
        "KU: Hyperdiagonal order of A. The value of KU must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For cgbmv, alpha is of single-precision complex number type.",
        "A: Band matrix. The matrix size is lda*N. For cgbmv, A is of single-precision complex number type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (KL + KU + 1).",
        "X: Vector X. If TransA = CblasNoTrans, the vector size is at least (1+(N-1)*abs(incX)). Otherwise, the vector size must be at least (1+(M-1)*abs(incX)). For cgbmv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgbmv, beta is of single-precision complex number type.",
        "Y: Vector Y. If TransA = CblasNoTrans, the vector size is at least (1+(M-1)*abs(incY)). Otherwise, the vector size is at least (1+(N-1)*abs(incY)). For cgbmv, Y is of single-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_zgbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量与带状矩阵乘积",
      "desc_en": "Product of a vector and a band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const BLASINT M, const BLASINT N, const BLASINT KL, const BLASINT KU, const void *alpha, const void *A, const BLASINT lda, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then y = alpha * A * x + beta * y. If TransA = CblasTrans, then y = alpha * A * x + beta * y. If TransA = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "M: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "N: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "KL: Subdiagonal order of A. The value of KL must be greater than or equal to 0.",
        "KU: Hyperdiagonal order of A. The value of KU must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For zgbmv, alpha is of double-precision complex number type.",
        "A: Band matrix. The matrix size is lda*N. For zgbmv, A is of double-precision complex number type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (KL + KU + 1).",
        "X: Vector X. If TransA = CblasNoTrans, the vector size is at least (1+(N-1)*abs(incX)). Otherwise, the vector size must be at least (1+(M-1)*abs(incX)). For zgbmv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For zgbmv, beta is of double-precision complex number type.",
        "Y: Vector Y. If TransA = CblasNoTrans, the vector size is at least (1+(M-1)*abs(incY)). Otherwise, the vector size is at least (1+(N-1)*abs(incY)). For zgbmv, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_sgemv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型向量与矩阵乘积",
      "desc_en": "Product of a vector and a matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const BLASINT m, const BLASINT n, const float alpha, const float *a, const BLASINT lda, const float *x, const BLASINT incx, const float beta, float *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "trans: Operation of matrix A. If trans = CblasNoTrans, then y = alpha * A * x + beta * y. If trans = CblasTrans, then y = alpha * A * x + beta * y. If trans = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "m: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "n: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For sgemv, alpha is of single-precision floating-point type.",
        "a: Matrix. The matrix size is lda*n. For sgemv, a is of single-precision floating-point type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, m). Otherwise, lda must be greater than or equal to max(1, n).",
        "x: Vector x. If trans = CblasNoTrans, the vector size is at least (1+(n-1)*abs(incx)). Otherwise, the vector size must be at least (1+(m-1)*abs(incy)). For sgemv, x is of single-precision floating-point type.",
        "incx: Increment for elements in x. The value cannot be 0.",
        "beta: Multiplication coefficient. For sgemv, beta is of single-precision floating-point type.",
        "y: Vector y. If trans = CblasNoTrans, the vector size is at least (1+(m-1)*abs(incy)). Otherwise, the vector size is at least (1+(n-1)*abs(incy)). For zgbmv, y is of double-precision complex number type.",
        "incy: Increment for elements in y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dgemv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型向量与矩阵乘积",
      "desc_en": "Product of a vector and a matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const BLASINT m, const BLASINT n, const double alpha, const double *a, const BLASINT lda, const double *x, const BLASINT incx, const double beta, double *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "trans: Operation of matrix A. If trans = CblasNoTrans, then y = alpha * A * x + beta * y. If trans = CblasTrans, then y = alpha * A * x + beta * y. If trans = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "m: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "n: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For dgemv, alpha is of double-precision floating-point type.",
        "a: Matrix. The matrix size is lda*n. For dgemv, a is of double-precision floating-point type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, m). Otherwise, lda must be greater than or equal to max(1, n).",
        "x: Vector x. If trans = CblasNoTrans, the vector size is at least (1+(n-1)*abs(incx)). Otherwise, the vector size must be at least (1+(m-1)*abs(incy)). For dgemv, x is of double-precision floating-point type.",
        "incx: Increment for elements in x. The value cannot be 0.",
        "beta: Multiplication coefficient. For dgemv, beta is of double-precision floating-point type.",
        "y: Vector y. If trans = CblasNoTrans, the vector size is at least (1+(m-1)*abs(incy)). Otherwise, the vector size is at least (1+(n-1)*abs(incy)). For dgemv, y is of double-precision floating-point type.",
        "incy: Increment for elements in y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_cgemv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量与矩阵乘积",
      "desc_en": "Product of a vector and a matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const BLASINT m, const BLASINT n, const void *alpha, const void *a, const BLASINT lda, const void *x, const BLASINT incx, const void *beta, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "trans: Operation of matrix A. If trans = CblasNoTrans, then y = alpha * A * x + beta * y. If trans = CblasTrans, then y = alpha * A * x + beta * y. If trans = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "m: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "n: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For cgemv, alpha is of single-precision complex number type.",
        "a: Matrix. The matrix size is lda*n. For cgemv, a is of single-precision complex number type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, m). Otherwise, lda must be greater than or equal to max(1, n).",
        "x: Vector x. If trans = CblasNoTrans, the vector size is at least (1+(n-1)*abs(incx)). Otherwise, the vector size must be at least (1+(m-1)*abs(incy)). For cgemv, x is of single-precision complex number type.",
        "incx: Increment for elements in x. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgemv, beta is of single-precision complex number type.",
        "y: Vector y. If trans = CblasNoTrans, the vector size is at least (1+(m-1)*abs(incy)). Otherwise, the vector size is at least (1+(n-1)*abs(incy)). For cgemv, y is of single-precision complex number type.",
        "incy: Increment for elements in y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_zgemv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量与矩阵乘积",
      "desc_en": "Product of a vector and a matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const BLASINT m, const BLASINT n, const void *alpha, const void *a, const BLASINT lda, const void *x, const BLASINT incx, const void *beta, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "trans: Operation of matrix A. If trans = CblasNoTrans, then y = alpha * A * x + beta * y. If trans = CblasTrans, then y = alpha * A * x + beta * y. If trans = CblasConjTrans, then y = alpha * conjg(A') * x + beta * y.",
        "m: Number of rows in matrix A. The value of M must be greater than or equal to 0.",
        "n: Number of columns of matrix A. The value of N must be greater than or equal to 0.",
        "alpha: Multiplication coefficient. For zgemv, alpha is of double-precision complex number type.",
        "a: Matrix. The matrix size is lda*n. For zgemv, a is of double-precision complex number type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, m). Otherwise, lda must be greater than or equal to max(1, n).",
        "x: Vector x. If trans = CblasNoTrans, the vector size is at least (1+(n-1)*abs(incx)). Otherwise, the vector size must be at least (1+(m-1)*abs(incy)). For zgemv, x is of double-precision complex number type.",
        "incx: Increment for elements in x. The value cannot be 0.",
        "beta: Multiplication coefficient. For zgemv, beta is of double-precision complex number type.",
        "y: Vector y. If trans = CblasNoTrans, the vector size is at least (1+(m-1)*abs(incy)). Otherwise, the vector size is at least (1+(n-1)*abs(incy)). For zgemv, y is of double-precision complex number type.",
        "incy: Increment for elements in y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_sger",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度浮点类型一般矩阵秩1更新",
      "desc_en": "General matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sger (const enum CBLAS_ORDER order, const BLASINT M, const BLASINT N, const float alpha, const float *X, const BLASINT incX, const float *Y, const BLASINT incY, float *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "M: Number of rows in matrix A",
        "N: Number of columns in matrix A",
        "alpha: Multiplication coefficient. For sger, alpha is of single-precision floating-point type",
        "X: Matrix X. The vector scale is at least (1+(M-1)*abs(incX)). For sger, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgbmv, beta is of single-precision complex number type.",
        "Y: Matrix Y. The vector size is at least (1+(N-1)*abs(incY)). For sger, Y is of single-precision floating-point type.",
        "incY: Increment for elements in Y. The value cannot be 0.",
        "A: Matrix A(lda, n). For sger, A is of single-precision floating-point type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, M). Otherwise, lda must be greater than or equal to max(1,N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dger",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度浮点类型一般矩阵秩1更新",
      "desc_en": "General matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dger (const enum CBLAS_ORDER order, const BLASINT M, const BLASINT N, const double alpha, const double *X, const BLASINT incX, const double *Y, const BLASINT incY, double *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "M: Number of rows in matrix A",
        "N: Number of columns in matrix A",
        "alpha: Multiplication coefficient. For dger, alpha is of double-precision floating-point type",
        "X: Matrix X. The vector scale is at least (1+(M-1)*abs(incX)). For dger, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgbmv, beta is of single-precision complex number type.",
        "Y: Matrix Y. The vector size is at least (1+(N-1)*abs(incY)). For dger, Y is of double-precision floating-point type.",
        "incY: Increment for elements in Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For dger, A is of double-precision floating-point type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, M). Otherwise, lda must be greater than or equal to max(1,N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_cgerc",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "一般单精度复数矩阵秩1更新",
      "desc_en": "General complex matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cgerc(const enum CBLAS_ORDER order, const BLASINT M, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "M: Number of rows in matrix A",
        "N: Number of columns in matrix A",
        "alpha: Multiplication coefficient. For cgerc, alpha is of single-precision complex number type.",
        "X: Matrix X. The vector scale is at least (1+(M-1)*abs(incX)). For cgerc, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgbmv, beta is of single-precision complex number type.",
        "Y: Matrix Y. The vector size is at least (1+(N-1)*abs(incY)). For cgerc, Y is of single-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For cgerc, A is of single-precision complex number type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, M). Otherwise, lda must be greater than or equal to max(1,N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zgerc",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "一般双精度复数矩阵秩1更新",
      "desc_en": "General complex matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zgerc(const enum CBLAS_ORDER order, const BLASINT M, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "M: Number of rows in matrix A",
        "N: Number of columns in matrix A",
        "alpha: Multiplication coefficient. For zgerc, alpha is of double-precision complex number type.",
        "X: Matrix X. The vector scale is at least (1+(M-1)*abs(incX)). For zgerc, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgbmv, beta is of single-precision complex number type.",
        "Y: Matrix Y. The vector size is at least (1+(N-1)*abs(incY)). For zgerc, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For zgerc, A is of double-precision complex number type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, M). Otherwise, lda must be greater than or equal to max(1,N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_cgeru",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型一般共轭矩阵秩1更新",
      "desc_en": "General conjugate matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cgeru(const enum CBLAS_ORDER order, const BLASINT M, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "M: Number of rows in matrix A",
        "N: Number of columns in matrix A",
        "alpha: Multiplication coefficient. For zgerc, alpha is of double-precision complex number type.",
        "X: Matrix X. The vector scale is at least (1+(M-1)*abs(incX)). For zgerc, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgbmv, beta is of single-precision complex number type.",
        "Y: Matrix Y. The vector size is at least (1+(N-1)*abs(incY)). For zgerc, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For zgerc, A is of double-precision complex number type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, M). Otherwise, lda must be greater than or equal to max(1,N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zgeru",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型一般共轭矩阵秩1更新",
      "desc_en": "General conjugate matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zgeru(const enum CBLAS_ORDER order, const BLASINT M, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "M: Number of rows in matrix A",
        "N: Number of columns in matrix A",
        "alpha: Multiplication coefficient. For zgeru, alpha is of double-precision complex number type.",
        "X: Matrix X. The vector scale is at least (1+(M-1)*abs(incX)). For zgeru, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For cgbmv, beta is of single-precision complex number type.",
        "Y: Matrix Y. The vector size is at least (1+(N-1)*abs(incY)). For zgeru, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For zgeru, A is of double-precision complex number type.",
        "lda: Length of the primary dimension in matrix A. If A is a column-store matrix, lda must be greater than or equal to max(1, M). Otherwise, lda must be greater than or equal to max(1,N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_chbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量与埃尔米特带状矩阵乘积",
      "desc_en": "Product of a vector and a Hermitian band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "alpha: Coefficient. For chbmv, alpha is of single-precision complex number type.",
        "A: Hermitian band matrix A(lda, N). For chbmv, A is of single-precision complex number type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For chbmv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For chbmv, beta is of single-precision complex number type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For chbmv, Y is of single-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_zhbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量与埃尔米特带状矩阵乘积",
      "desc_en": "Product of a vector and a Hermitian band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "alpha: Coefficient. For zhbmv, alpha is of double-precision complex number type.",
        "A: Hermitian band matrix A(lda, N). For zhbmv, A is of double-precision complex number type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For zhbmv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For zhbmv, beta is of double-precision complex number type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For zhbmv, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_chemv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量与埃尔米特矩阵乘积",
      "desc_en": "Product of a vector and a Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Coefficient. For chemv, alpha is of single-precision complex number type.",
        "A: Hermitian band matrix A(lda, N). For chemv, A is of single-precision complex number type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For chemv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For chemv, beta is of single-precision complex number type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For chemv, Y is of single-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_zhemv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量与埃尔米特矩阵乘积",
      "desc_en": "Product of a vector and a Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Coefficient. For zhemv, alpha is of double-precision complex number type.",
        "A: Hermitian band matrix A(lda, N). For zhemv, A is of double-precision complex number type.",
        "lda: Length of the main dimension in matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For zhemv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For zhemv, beta is of double-precision complex number type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For zhemv, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_cher",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数埃尔米特矩阵秩1更新",
      "desc_en": "Complex Hermitian matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const float alpha, const void *X, const BLASINT incX, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo:  Storage mode of a Hermitian matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For cher, alpha is of single-precision floating-point type.",
        "X: Matrix X. The length must be at least 1+(n-1)*abs(incX). For cher, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "A: Matrix A(lda, N). For cher, A is of single-precision complex number type.",
        "lda: Length of the main dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zher",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数埃尔米特矩阵秩1更新",
      "desc_en": "Complex Hermitian matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const void *X, const BLASINT incX, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo:  Storage mode of a Hermitian matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For zher, alpha is of double-precision floating-point type.",
        "X: Matrix X. The length must be at least 1+(n-1)*abs(incX). For zher, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "A: Matrix A(lda, N). For zher, A is of double-precision complex number type.",
        "lda: Length of the main dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_cher2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数埃尔米特矩阵秩2更新",
      "desc_en": "Complex Hermitian matrix rank 2 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of a Hermitian matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For cher2, alpha is of single-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(n-1)*abs(incX). For cher2, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "Y: Matrix Y. The length must be at least 1+(n-1)*abs(incY). For cher2, Y is of single-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For cher2, A is of single-precision complex number type.",
        "lda:  Length of the main dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zher2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数埃尔米特矩阵秩2更新",
      "desc_en": "Complex Hermitian matrix rank 2 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of a Hermitian matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For zher2, alpha is of double-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(n-1)*abs(incX). For zher2, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "Y: Matrix Y. The length must be at least 1+(n-1)*abs(incY). For zher2, Y is of double-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For zher2, A is of double-precision complex number type.",
        "lda:  Length of the main dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_chpmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数向量与压缩的埃尔米特矩阵乘积",
      "desc_en": "Product of vector and compressed Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *Ap, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Coefficient. For chpmv, alpha is of single-precision complex number type.",
        "Ap: The size of a compressed Hermitian matrix is at least (N*(N+1)/2). For chpmv, Ap is of single-precision complex number type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For chpmv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For chpmv, beta is of single-precision complex number type.",
        "Y:Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For chpmv, Y is of single-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_zhpmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数向量与压缩的埃尔米特矩阵乘积",
      "desc_en": "Product of vector and compressed Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *Ap, const void *X, const BLASINT incX, const void *beta, void *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Coefficient. For zhpmv, alpha is of double-precision complex number type.",
        "Ap: The size of a compressed Hermitian matrix is at least (N*(N+1)/2). For zhpmv, Ap is of double-precision complex number type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For zhpmv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For zhpmv, beta is of double-precision complex number type.",
        "Y:Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For zhpmv, Y is of double-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_chpr",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "三角展开的埃尔米特矩阵秩1更新",
      "desc_en": "Rank 1 update of triangularly expanded Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const float alpha, const void *X, const BLASINT incX, void *A)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion storage mode of a Hermitian matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For chpr, alpha is of single-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For chpr, X is of single-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "A: Matrix A. The matrix scale is at least (N*(N+1)/2). For chpr, A is of single-precision complex number type."
      ],
      "return": ""
    },
    {
      "name": "cblas_zhpr",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "三角展开的埃尔米特矩阵秩1更新",
      "desc_en": "Rank 1 update of triangularly expanded Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const void *X,const BLASINT incX, void *A)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion storage mode of a Hermitian matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For zhpr, alpha is of double-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For zhpr, X is of double-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "A: Matrix A. The matrix scale is at least (N*(N+1)/2). For zhpr, A is of double-precision complex number type."
      ],
      "return": ""
    },
    {
      "name": "cblas_chpr2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "三角展开的埃尔米特矩阵秩2更新",
      "desc_en": "Rank 2 update of triangularly expanded Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *Ap)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Hermitian matrix expansion storage mode (upper triangle expansion or lower triangle expansion). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For chpr2, alpha is of single-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For chpr2, X is of single-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Y: Matrix Y. The length must be at least 1+(N-1)*abs(incY). For cher2, Y is of single-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "Ap: Matrix A triangle storage. The matrix scale is at least (N*(N+1)/2). For chpr2, Ap is of single-precision complex number type."
      ],
      "return": ""
    },
    {
      "name": "cblas_zhpr2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "三角展开的埃尔米特矩阵秩2更新",
      "desc_en": "Rank 2 update of triangularly expanded Hermitian matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const void *alpha, const void *X, const BLASINT incX, const void *Y, const BLASINT incY, void *Ap)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Hermitian matrix expansion storage mode (upper triangle expansion or lower triangle expansion). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For zhpr2, alpha is of double-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For zhpr2, X is of double-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Y: Matrix Y. The length must be at least 1+(N-1)*abs(incY). For zher2, Y is of double-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "Ap: Matrix A triangle storage. The matrix scale is at least (N*(N+1)/2). For zhpr2, Ap is of double-precision complex number type."
      ],
      "return": ""
    },
    {
      "name": "cblas_ssbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度向量与对称带状矩阵乘积",
      "desc_en": "Product of a vector and a symmetric band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const BLASINT K, const float alpha, const float *A, const BLASINT lda, const float *X, const BLASINT incX, const float beta, float *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "alpha: Multiplication coefficient. For ssbmv, alpha is of single-precision floating-point type.",
        "A: Symmetric band-shaped matrix. The matrix scale is lda*n. For ssbmv, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (K + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ssbmv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For ssbmv, beta is of single-precision floating-point type.",
        "Y: Matrix Y. The length must be at least 1+(N-1)*abs(incY). For zher2, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dsbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度向量与对称带状矩阵乘积",
      "desc_en": "Product of a vector and a symmetric band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const BLASINT K, const double alpha, const double *A, const BLASINT lda, const double *X, const BLASINT incX, const double beta, double *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "alpha: Multiplication coefficient. For dsbmv, alpha is of double-precision floating-point type.",
        "A: Symmetric band-shaped matrix. For dsbmv, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (K + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dsbmv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For dsbmv, beta is of double-precision floating-point type.",
        "Y: Matrix Y. The length must be at least 1+(N-1)*abs(incY). For dsbmv, Y is of double-precision floating-point type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_sspmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度向量与压缩的对称矩阵乘积",
      "desc_en": "Product of vector and compressed symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const float alpha, const float *Ap, const float *X, const BLASINT incX, const float beta, float *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Multiplication coefficient. For sspmv, alpha is of single-precision complex number type.",
        "Ap: The size of a compressed symmetric matrix is at least (N*(N+1)/2). For sspmv, Ap is of single-precision complex number type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For sspmv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For sspmv, beta is of single-precision complex number type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For sspmv, Y is of single-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dspmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度向量与压缩的对称矩阵乘积",
      "desc_en": "Product of vector and compressed symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const double *Ap, const double *X, const BLASINT incX, const double beta, double *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Multiplication coefficient. For dspmv, alpha is of double-precision complex number type.",
        "Ap: The size of a compressed symmetric matrix is at least (N*(N+1)/2). For dspmv, Ap is of double-precision complex number type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dspmv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For dspmv, beta is of double-precision complex number type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For dspmv, Y is of double-precision complex number type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_sspr",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "对称展开矩阵秩1更新",
      "desc_en": "Rank 1 update of symmetric expansion matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const float alpha, const float *X, const BLASINT incX, float *Ap)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion storage mode of a symmetric matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For sspr, alpha is of single-precision floating-point type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For sspr, X is of single-precision floating-point type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Ap: Matrix A. For sspr, Ap is of single-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_dspr",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "对称展开矩阵秩1更新",
      "desc_en": "Rank 1 update of symmetric expansion matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const double *X, const BLASINT incX, double *Ap)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion storage mode of a symmetric matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For dspr, alpha is of double-precision floating-point type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For dspr, X is of double-precision floating point type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Ap: Matrix A. For dspr, Ap is of double-precision floating-point type."
      ],
      "return": ""
    },
    {
      "name": "cblas_sspr2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "对称展开矩阵秩2更新",
      "desc_en": "Rank 2 update of symmetric expansion matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const float alpha, const float *X, const BLASINT incX, const float *Y, const BLASINT incY, float *A)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion storage mode of a symmetric matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For sspr2, alpha is of single-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(n-1)*abs(incX). For sspr2, X is of single-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For sspr2, Y is of single-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "A: Triangle storage of matrix A. For sspr2, A is of single-precision complex number type."
      ],
      "return": ""
    },
    {
      "name": "cblas_dspr2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "对称展开矩阵秩2更新",
      "desc_en": "Rank 2 update of symmetric expansion matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const double *X, const BLASINT incX, const double *Y, const BLASINT incY, double *A)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Expansion storage mode of a symmetric matrix (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For dspr2, alpha is of double-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(n-1)*abs(incX). For dspr2, X is of double-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For dspr2, Y is of double-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "A: Triangle storage of matrix A. For dspr2, A is of double-precision complex number type."
      ],
      "return": ""
    },
    {
      "name": "cblas_ssymv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度向量与对称矩阵乘积",
      "desc_en": "Product of a vector and a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const float alpha, const float *A, const BLASINT lda, const float *X, const BLASINT incX, const float beta, float *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Coefficient. For ssymv, alpha is of single-precision floating-point type.",
        "A: Symmetric matrix A(lda, N). For ssymv, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ssymv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For ssymv, beta is of single-precision floating-point type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For ssymv, Y is of single-precision floating-point type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dsymv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度向量与对称矩阵乘积",
      "desc_en": "Product of a vector and a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const double *A, const BLASINT lda, const double *X, const BLASINT incX, const double beta, double *Y, const BLASINT incY)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "alpha: Coefficient. For dsymv, alpha is of double-precision floating-point type.",
        "A: Symmetric matrix A(lda, N). For dsymv, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dsymv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "beta: Multiplication coefficient. For dsymv, beta is of double-precision floating-point type.",
        "Y: Vector Y. The vector scale is at least (1+(N-1)*abs(incY)). For dsymv, Y is of double-precision floating-point type.",
        "incY: Increment for elements in Y. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ssyr",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度对称矩阵秩1更新",
      "desc_en": "Symmetric matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const float alpha, const float *X, const BLASINT incX, float *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For ssyr, alpha is of single-precision floating-point type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For ssyr, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "A: Matrix A(lda, N). For ssyr, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dsyr",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度对称矩阵秩1更新",
      "desc_en": "Symmetric matrix rank 1 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const double *X, const BLASINT incX, double *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For dsyr, alpha is of double-precision floating-point type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For dsyr, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0.",
        "A: Matrix A(lda, N). For dsyr, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ssyr2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度对称矩阵秩2更新",
      "desc_en": "Symmetric matrix rank 2 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,const BLASINT N, const float alpha, const float *X, const BLASINT incX, const float *Y, const BLASINT incY, float *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For ssyr2, alpha is of single-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For ssyr2, X is of single-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Y: Matrix Y. The length must be at least 1+(n-1)*abs(incY). For ssyr2, Y is of single-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For ssyr2, A is of single-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dsyr2",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度对称矩阵秩2更新",
      "desc_en": "Symmetric matrix rank 2 update",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const BLASINT N, const double alpha, const double *X, const BLASINT incX, const double *Y, const BLASINT incY, double *A, const BLASINT lda)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "N: Number of elements in vector X",
        "alpha: Multiplication coefficient. For dsyr2, alpha is of double-precision complex number type.",
        "X: Matrix X. The length must be at least 1+(N-1)*abs(incX). For dsyr2, X is of double-precision complex number type.",
        "incX: Increment for elements in vector X. The value cannot be 0.",
        "Y: Matrix Y. The length must be at least 1+(n-1)*abs(incY). For dsyr2, Y is of double-precision complex number type.",
        "incY: Increment for elements in vector Y. The value cannot be 0.",
        "A: Matrix A(lda, N). For dsyr2, A is of double-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_stbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型向量与三角带状矩阵乘积",
      "desc_en": "Product of a vector and a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const BLASINT K, const float *A, const BLASINT lda, float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For stbmv, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For stbmv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型向量与三角带状矩阵乘积",
      "desc_en": "Product of a vector and a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const BLASINT K, const double *A, const BLASINT lda, double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For dtbmv, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dtbmv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量与三角带状矩阵乘积",
      "desc_en": "Product of a vector and a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const BLASINT K, const void *A, const BLASINT lda, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For ctbmv, A is of single-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ctbmv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztbmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量与三角带状矩阵乘积",
      "desc_en": "Product of a vector and a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const BLASINT K, const void *A, const BLASINT lda, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For ztbmv, A is of double-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ztbmv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_stbsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型线性方程组求解，稀疏矩阵为三角带状矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const blasint K, const float *A, const blasint lda, float *X, const blasint incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For stbsv, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For stbsv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtbsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型线性方程组求解，稀疏矩阵为三角带状矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const blasint K, const double *A, const blasint lda, double *X, const blasint incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For dtbsv, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dtbsv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctbsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型线性方程组求解，稀疏矩阵为三角带状矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const blasint K, const void *A, const blasint lda, void *X, const blasint incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For ctbsv, A is of single-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ctbsv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztbsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型线性方程组求解，稀疏矩阵为三角带状矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular band matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLOUplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const blasint N, const blasint K, const void* A, const blasint lda, void *X, const blasint incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether matrix A is an upper triangular matrix or a lower triangular matrix. If Uplo = CblasUpper, A is an upper triangular matrix. If Uplo = CblasLower, A is the lower triangular matrix.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "K: Super diagonal order of the matrix A. K must be greater than or equal to zero.",
        "A: Triangular band matrix A(lda, n). For ztbsv, A is of double-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ztbsv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_stpmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型向量与压缩的三角矩阵乘积",
      "desc_en": "Product of vector and compressed triangular mstrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const float *Ap, float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For stpmv, Ap is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For stpmv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtpmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型向量与压缩的三角矩阵乘积",
      "desc_en": "Product of vector and compressed triangular mstrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const double *Ap, double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For dtpmv, Ap is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dtpmv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctpmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量与压缩的三角矩阵乘积",
      "desc_en": "Product of vector and compressed triangular mstrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *Ap, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For ctpmv, Ap is of single-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ctpmv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztpmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量与压缩的三角矩阵乘积",
      "desc_en": "Product of vector and compressed triangular mstrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *Ap, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For ztpmv, Ap is of double-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to (k + 1).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ztpmv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_stpsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型线性方程组求解，稀疏矩阵是压缩的三角矩阵",
      "desc_en": "For solving linear equations, the sparse matrix is a compressed triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const float *Ap, float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For stpsv, Ap is of single-precision floating-point type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). The output is updated after the equation is solved. For stpsv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtpsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型线性方程组求解，稀疏矩阵是压缩的三角矩阵",
      "desc_en": "For solving linear equations, the sparse matrix is a compressed triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const double *Ap, double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For dtpsv, Ap is of double-precision floating-point type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dtpsv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctpsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型线性方程组求解，稀疏矩阵是压缩的三角矩阵",
      "desc_en": "For solving linear equations, the sparse matrix is a compressed triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *Ap, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For ctpsv, Ap is of single-precision complex number type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ctpsv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztpsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型线性方程组求解，稀疏矩阵是压缩的三角矩阵",
      "desc_en": "For solving linear equations, the sparse matrix is a compressed triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *Ap, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage expansion mode of matrix A (upper triangle or lower triangle). If Uplo = CblasUpper, the upper triangle of A is used for expansion. If Uplo = CblasLower, the lower triangle of A is used for expansion.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "Ap: The size of a compressed triangular matrix is at least (N*(N+1)/2). For ztpsv, Ap is of double-precision complex number type.",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ztpsv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_strmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型向量与三角矩阵乘积",
      "desc_en": "Product of a vector and a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const float *A, const BLASINT lda, float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For strmv, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For strmv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtrmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型向量与三角矩阵乘积",
      "desc_en": "Product of a vector and a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const double *A, const BLASINT lda, double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For dtrmv, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dtrmv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctrmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量与三角矩阵乘积",
      "desc_en": "Product of a vector and a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *A, const BLASINT lda, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For ctrmv, A is of single-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ctrmv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztrmv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量与三角矩阵乘积",
      "desc_en": "Product of a vector and a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *A, const BLASINT lda, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For ztrmv, A is of double-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ztrmv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_strsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型线性方程组求解，稀疏矩阵是三角矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const float *A, const BLASINT lda, float *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For strsv, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For strsv, X is of single-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtrsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型线性方程组求解，稀疏矩阵是三角矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const double *A, const BLASINT lda, double *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For dtrsv, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For dtrsv, X is of double-precision floating-point type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctrsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型线性方程组求解，稀疏矩阵是三角矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *A, const BLASINT lda, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For ctrsv, A is of single-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ctrsv, X is of single-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztrsv",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型线性方程组求解，稀疏矩阵是三角矩阵",
      "desc_en": "Solves linear equations. The sparse matrix is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT N, const void *A, const BLASINT lda, void *X, const BLASINT incX)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Indicates whether to use the upper triangle or lower triangle of matrix A. If Uplo = CblasUpper, the upper triangle of A is used. If Uplo = CblasLower, the lower triangle of A is used.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, then X = A * X. If trans = CblasTrans, then X = A * X. If trans = CblasConjTrans, then X = conjg(A') * X.",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "N: Order of the matrix A. N must be greater than or equal to zero.",
        "A: Triangular matrix. The minimum matrix size is lda*N. For ztrsv, A is of double-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "X: Vector X. The vector scale is at least (1+(N-1)*abs(incX)). For ztrsv, X is of double-precision complex number type.",
        "incX: Increment for elements in X. The value cannot be 0."
      ],
      "return": ""
    },
    {
      "name": "cblas_sgemm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型一般矩阵乘矩阵",
      "desc_en": "General matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const BLASINT M, const BLASINT N, const BLASINT K, const float alpha, const float *A, const BLASINT lda, const float *B, const BLASINT ldb, const float beta, float *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then op(A) = A. If TransA = CblasTrans, then op(A) = A'. If TransA = CblasConjTrans, then op(A) = conjg(A').",
        "TransB: The matrix B is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransB = CblasNoTrans, then op(B) = B. If TransB = CblasTrans, then op(B) = B'. If TransB = CblasConjTrans, then op(B) = conjg(B').",
        "M: Rows of matrices op(A) and C",
        "N: Columns of matrices op(B) and C",
        "K: Columns of the matrix op(A) and rows of the matrix op(B)",
        "alpha: Multiplication coefficient. For sgemm, alpha is of single-precision floating-point type.",
        "A: Matrix A. For sgemm, A is of single-precision floating-point type.",
        "lda: If the matrix is column store and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, K); otherwise, lda is at least max(1, M).",
        "B: Matrix B. For sgemm, B is of single-precision floating-point type.",
        "ldb: If the matrix is column store and TransB = CblasNoTrans, ldb is at least max(1, K); otherwise, ldb is at least max(1, N). If the matrix is row store and TransB = CblasNoTrans, ldb is at least max(1, N); otherwise, ldb is at least max(1, K).",
        "beta: Multiplication coefficient. For sgemm, beta is of single-precision floating-point type.",
        "C: Matrix C. For sgemm, C is of single-precision floating-point type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). Otherwise, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dgemm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型一般矩阵乘矩阵",
      "desc_en": "General matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const BLASINT M, const BLASINT N, const BLASINT K, const double alpha, const double *A, const BLASINT lda, const double *B, const BLASINT ldb, const double beta, double *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then op(A) = A. If TransA = CblasTrans, then op(A) = A'. If TransA = CblasConjTrans, then op(A) = conjg(A').",
        "TransB: The matrix B is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransB = CblasNoTrans, then op(B) = B. If TransB = CblasTrans, then op(B) = B'. If TransB = CblasConjTrans, then op(B) = conjg(B').",
        "M: Rows of matrices op(A) and C",
        "N: Columns of matrices op(B) and C",
        "K: Columns of the matrix op(A) and rows of the matrix op(B)",
        "alpha: Multiplication coefficient. For dgemm, alpha is of double-precision floating-point type.",
        "A: Matrix A. For dgemm, A is of double-precision floating-point type.",
        "lda: If the matrix is column store and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, K); otherwise, lda is at least max(1, M).",
        "B: Matrix B. For dgemm, B is of double-precision floating-point type.",
        "ldb: If the matrix is column store and TransB = CblasNoTrans, ldb is at least max(1, K); otherwise, ldb is at least max(1, N). If the matrix is row store and TransB = CblasNoTrans, ldb is at least max(1, N); otherwise, ldb is at least max(1, K).",
        "beta: Multiplication coefficient. For dgemm, beta is of double-precision floating-point type.",
        "C: Matrix C. For dgemm, C is of double-precision floating-point type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). Otherwise, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_cgemm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型一般矩阵乘矩阵",
      "desc_en": "General matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const BLASINT M, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then op(A) = A. If TransA = CblasTrans, then op(A) = A'. If TransA = CblasConjTrans, then op(A) = conjg(A').",
        "TransB: The matrix B is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransB = CblasNoTrans, then op(B) = B. If TransB = CblasTrans, then op(B) = B'. If TransB = CblasConjTrans, then op(B) = conjg(B').",
        "M: Rows of matrices op(A) and C",
        "N: Columns of matrices op(B) and C",
        "K: Columns of the matrix op(A) and rows of the matrix op(B)",
        "alpha: Multiplication coefficient. For cgemm, alpha is of single-precision complex number type.",
        "A: Matrix A. For cgemm, A is of single-precision complex number type.",
        "lda: If the matrix is column store and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, K); otherwise, lda is at least max(1, M).",
        "B: Matrix B. For cgemm, B is of single-precision complex number type.",
        "ldb: If the matrix is column store and TransB = CblasNoTrans, ldb is at least max(1, K); otherwise, ldb is at least max(1, N). If the matrix is row store and TransB = CblasNoTrans, ldb is at least max(1, N); otherwise, ldb is at least max(1, K).",
        "beta: Multiplication coefficient. For cgemm, beta is of single-precision complex number type.",
        "C: Matrix C. For cgemm, C is of single-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). Otherwise, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zgemm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型一般矩阵乘矩阵",
      "desc_en": "General matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const BLASINT M, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, then op(A) = A. If TransA = CblasTrans, then op(A) = A'. If TransA = CblasConjTrans, then op(A) = conjg(A').",
        "TransB: The matrix B is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransB = CblasNoTrans, then op(B) = B. If TransB = CblasTrans, then op(B) = B'. If TransB = CblasConjTrans, then op(B) = conjg(B').",
        "M: Rows of matrices op(A) and C",
        "N: Columns of matrices op(B) and C",
        "K: Columns of the matrix op(A) and rows of the matrix op(B)",
        "alpha: Multiplication coefficient. For zgemm, alpha is of double-precision complex number type.",
        "A: Matrix A. For zgemm, A is of double-precision complex number type.",
        "lda: If the matrix is column store and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, K); otherwise, lda is at least max(1, M).",
        "B: Matrix B. For zgemm, B is of double-precision complex number type.",
        "ldb: If the matrix is column store and TransB = CblasNoTrans, ldb is at least max(1, K); otherwise, ldb is at least max(1, N). If the matrix is row store and TransB = CblasNoTrans, ldb is at least max(1, N); otherwise, ldb is at least max(1, K).",
        "beta: Multiplication coefficient. For zgemm, beta is of double-precision complex number type.",
        "C: Matrix C. For zgemm, C is of double-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). Otherwise, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_chemm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型埃尔米特矩阵乘积",
      "desc_en": "Complex Hermitian moment-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether Hermitian matrix A is on the left or right side of matrix B.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "M: Number of rows of matrix C. A*B indicates the number of rows of matrix A. B*A indicates the number of rows of matrix B.",
        "N: Column of matrix C. A*B indicates the number of columns of matrix B. B*A indicates the number of columns of matrix A.",
        "alpha: Multiplication coefficient. For chemm, alpha is of single-precision complex number type.",
        "A: Hermitian matrix A (lda, ka). If Side = CblasLeft, ka = M; otherwise, ka = N. For chemm, A is of single-precision complex number type.",
        "lda: If Side = CblasLeft, lda is at least max(1, M); otherwise, lda is at least max(1, N).",
        "B: Matrix B (ldb, N). For chemm, B is of single-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). If the matrix is row store, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For chemm, beta is of single-precision complex number type.",
        "C: Matrix C (ldc, N). For chemm, C is of single-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zhemm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型埃尔米特矩阵乘积",
      "desc_en": "Complex Hermitian moment-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether Hermitian matrix A is on the left or right side of matrix B.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "M: Number of rows of matrix C. A*B indicates the number of rows of matrix A. B*A indicates the number of rows of matrix B.",
        "N: Column of matrix C. A*B indicates the number of columns of matrix B. B*A indicates the number of columns of matrix A.",
        "alpha: Multiplication coefficient. For zhemm, alpha is of double-precision complex number type.",
        "A: Hermitian matrix A (lda, ka). If Side = CblasLeft, ka = M; otherwise, ka = N. For zhemm, A is of double-precision complex number type.",
        "lda: If Side = CblasLeft, lda is at least max(1, M); otherwise, lda is at least max(1, N).",
        "B: Matrix B (ldb, N). For zhemm, B is of double-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). If the matrix is row store, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For zhemm, beta is of double-precision complex number type.",
        "C: Matrix C (ldc, N). For zhemm, C is of double-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M)."
      ],
      "return": ""
    },
    {
      "name": "cblas_cher2k",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型矩阵乘矩阵秩2更新，矩阵C为复数埃尔米特矩阵",
      "desc_en": "Complex matrix-matrix multiplication rank 2 update. The matrix C is a complex Hermitian moment",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const float beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. When Trans = CblasNoTrans, C = alpha * A * B ^ H + conj(alpha) * B * A ^ H + beta * C. If Trans = CblasConjTrans, then C = alpha * A ^ H * B + conj(alpha) * B ^ H * A + beta * C.",
        "N: Number of rows of matrix C.",
        "K: Indicates the columns of matrix A and matrix B when Trans = CblasNoTrans. Indicates the rows of matrix A and matrix B when Trans = CblasConjTrans.",
        "alpha: Multiplication coefficient. For cher2k, alpha is of single-precision complex number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For cher2k, A is of single-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N); otherwise, lda must be at least max(1, K). If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, K); otherwise, lda must be at least max(1, N).",
        "B: Matrix B (ldb, kb). If Trans = CblasNoTrans, kb = K; otherwise, kb = N.",
        "ldb: If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, N); otherwise, ldb must be at least max(1, K). If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, K). Otherwise, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For cher2k, beta is of single-precision complex number type.",
        "C: Matrix C (ldc, N). For cher2k, C is of single-precision complex number type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zher2k",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型矩阵乘矩阵秩2更新，矩阵C为复数埃尔米特矩阵",
      "desc_en": "Complex matrix-matrix multiplication rank 2 update. The matrix C is a complex Hermitian moment",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const double beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. When Trans = CblasNoTrans, C = alpha * A * B ^ H + conj(alpha) * B * A ^ H + beta * C. If Trans = CblasConjTrans, then C = alpha * A ^ H * B + conj(alpha) * B ^ H * A + beta * C.",
        "N: Number of rows of matrix C.",
        "K: Indicates the columns of matrix A and matrix B when Trans = CblasNoTrans. Indicates the rows of matrix A and matrix B when Trans = CblasConjTrans.",
        "alpha: Multiplication coefficient. For zher2k, alpha is of double-precision complex number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For zher2k, A is of double-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N); otherwise, lda must be at least max(1, K). If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, K); otherwise, lda must be at least max(1, N).",
        "B: Matrix B (ldb, kb). For zher2k, B is of double-precision complex number type.",
        "ldb: If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, N); otherwise, ldb must be at least max(1, K). If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, K). Otherwise, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For zher2k, beta is of double-precision complex number type.",
        "C: Matrix C (ldc, N). For zher2k, C is of double-precision complex number type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_cherk",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型矩阵乘矩阵秩k更新，矩阵C为单精度复数埃尔米特矩阵",
      "desc_en": "Complex matrix-matrix multiplication rank k update. The matrix C is a float-complex Hermitian moment",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const float alpha, const void *A, const BLASINT lda, const float beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrix A is a conventional matrix or a conjugate matrix. When Trans = CblasNoTrans, C = alpha * A * A ^ H + beta * C. If Trans = CblasConjTrans, then C = alpha * A ^ H * A + beta * C.",
        "N: Order of matrix C",
        "K: Number of columns of matrix A when Trans = CblasNoTrans. Number of rows of matrix A when Trans = CblasConjTrans.",
        "alpha: Multiplication coefficient. For cherk, alpha is a single-precision real number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For cherk, A is of single-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N); otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "beta: Multiplication coefficient. For cherk, beta isof single-precision real number type.",
        "C: Matrix C (ldc, N). For cherk, C is of single-precision complex number type.",
        "ldc: ldc must be at least max(1, n)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zherk",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型矩阵乘矩阵秩k更新，矩阵C为双精度复数埃尔米特矩阵",
      "desc_en": "Complex matrix-matrix multiplication rank k update. The matrix C is a double-complex Hermitian moment",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const double alpha, const void *A, const BLASINT lda, const double beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrix A is a conventional matrix or a conjugate matrix. When Trans = CblasNoTrans, C = alpha * A * A ^ H + beta * C. If Trans = CblasConjTrans, then C = alpha * A ^ H * A + beta * C.",
        "N: Order of matrix C",
        "K: Number of columns of matrix A when Trans = CblasNoTrans. Number of rows of matrix A when Trans = CblasConjTrans.",
        "alpha: Multiplication coefficient. For zherk, alpha is of double-precision real number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For zherk, A is of double-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N); otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "beta: Multiplication coefficient. For zherk, beta is of the double precision type.",
        "C: Matrix C (ldc, N). For zherk, C is of the double-precision complex number type.",
        "ldc: ldc must be at least max(1, n)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ssymm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型对称矩阵乘矩阵",
      "desc_en": "Symmetric matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const BLASINT M, const BLASINT N, const float alpha, const float *A, const BLASINT lda, const float *B, const BLASINT ldb, const float beta, float *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether symmetric matrix A is on the left or right of matrix B. If Side = CblasLeft, C = alpha * A * B + beta * C. If Side = CblasRight, C = alpha * B * A + beta * C.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "M: Number of rows of matrix C. A*B indicates the number of rows of matrix A. B*A indicates the number of rows of matrix B.",
        "N: Column of matrix C. A*B indicates the number of columns of matrix B. B*A indicates the number of columns of matrix A.",
        "alpha: Multiplication coefficient. For ssymm, alpha is of single-precision floating-point type.",
        "A: Matrix A (lda, ka). If Side = CblasLeft, ka = M; otherwise, ka = N. For ssymm, A is of single-precision floating-point type.",
        "lda: If Side = CblasLeft, lda is at least max(1, M); otherwise, lda is at least max(1, N).",
        "B: Matrix B (ldb, N). For ssymm, B is of single-precision floating-point type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). If the matrix is row store, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For ssymm, beta is of single-precision floating-point type.",
        "C: Matrix C. For ssymm, C is of single-precision floating-point type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). If the matrix is row store, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dsymm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型对称矩阵乘矩阵",
      "desc_en": "Symmetric matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const BLASINT M, const BLASINT N, const double alpha, const double *A, const BLASINT lda, const double *B, const BLASINT ldb, const double beta, double *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether symmetric matrix A is on the left or right of matrix B. If Side = CblasLeft, C = alpha * A * B + beta * C. If Side = CblasRight, C = alpha * B * A + beta * C.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "M: Number of rows of matrix C. A*B indicates the number of rows of matrix A. B*A indicates the number of rows of matrix B.",
        "N: Column of matrix C. A*B indicates the number of columns of matrix B. B*A indicates the number of columns of matrix A.",
        "alpha: Multiplication coefficient. For dsymm, alpha is of double-precision floating-point type.",
        "A: Matrix A (lda, ka). If Side = CblasLeft, ka = M; otherwise, ka = N. For dsymm, A is of double-precision floating-point type.",
        "lda: If Side = CblasLeft, lda is at least max(1, M); otherwise, lda is at least max(1, N).",
        "B: Matrix B (ldb, N). For dsymm, B is of double-precision floating-point type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). If the matrix is row store, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For dsymm, beta is of double-precision floating-point type.",
        "C: Matrix C. For dsymm, C is of the double-precision floating-point type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). If the matrix is row store, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_csymm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型对称矩阵乘矩阵",
      "desc_en": "Symmetric matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether symmetric matrix A is on the left or right of matrix B. If Side = CblasLeft, C = alpha * A * B + beta * C. If Side = CblasRight, C = alpha * B * A + beta * C.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "M: Number of rows of matrix C. A*B indicates the number of rows of matrix A. B*A indicates the number of rows of matrix B.",
        "N: Column of matrix C. A*B indicates the number of columns of matrix B. B*A indicates the number of columns of matrix A.",
        "alpha: Multiplication coefficient. For csymm, alpha is of single-precision complex number type.",
        "A: Matrix A (lda, ka). If Side = CblasLeft, ka = M; otherwise, ka = N. For csymm, A is of single-precision complex number type.",
        "lda: If Side = CblasLeft, lda is at least max(1, M); otherwise, lda is at least max(1, N).",
        "B: Matrix B (ldb, N). For csymm, B is of single-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). If the matrix is row store, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For csymm, beta is of single-precision complex number type.",
        "C: Matrix C. For csymm, C is of the single-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). If the matrix is row store, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zsymm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型对称矩阵乘矩阵",
      "desc_en": "Symmetric matrix-matrix multiplication",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether symmetric matrix A is on the left or right of matrix B. If Side = CblasLeft, C = alpha * A * B + beta * C. If Side = CblasRight, C = alpha * B * A + beta * C.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "M: Number of rows of matrix C. A*B indicates the number of rows of matrix A. B*A indicates the number of rows of matrix B.",
        "N: Column of matrix C. A*B indicates the number of columns of matrix B. B*A indicates the number of columns of matrix A.",
        "alpha: Multiplication coefficient. For zsymm, alpha is of double-precision complex number type.",
        "A: Matrix A (lda, ka). If Side = CblasLeft, ka = M; otherwise, ka = N. For zsymm, A is of double-precision complex number type.",
        "lda: If Side = CblasLeft, lda is at least max(1, M); otherwise, lda is at least max(1, N).",
        "B: Matrix B (ldb, N). For zsymm, B is of double-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). If the matrix is row store, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For zsymm, beta is of double-precision complex number type.",
        "C: Matrix C. For zsymm, C is of double-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). If the matrix is row store, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ssyrk",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型矩阵乘矩阵秩K更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank k update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const float alpha, const float *A, const BLASINT lda, const float beta, float *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * A ^ T + α * A * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * A + α * A ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns of matrix A; otherwise, K indicates the number of rows of matrix A.",
        "alpha: Multiplication coefficient. For ssyrk, alpha is of single-precision floating-point type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For ssyrk, A is of single-precision floating-point type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "beta: Multiplication coefficient. For ssyrk, beta is of single-precision floating-point type.",
        "C: Symmetric matrix C. For ssyrk, C is of single-precision floating-point type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dsyrk",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型矩阵乘矩阵秩K更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank k update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const double alpha, const double *A, const BLASINT lda, const double beta, double *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * A ^ T + α * A * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * A + α * A ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns of matrix A; otherwise, K indicates the number of rows of matrix A.",
        "alpha: Multiplication coefficient. For dsyrk, alpha is of double-precision floating-point type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For dsyrk, A is of double-precision floating-point type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "beta: Multiplication coefficient. For dsyrk, beta is of double-precision floating-point type.",
        "C: Symmetric matrix C. For dsyrk, C is of double-precision floating-point type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_csyrk",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型矩阵乘矩阵秩K更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank k update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * A ^ T + α * A * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * A + α * A ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns of matrix A; otherwise, K indicates the number of rows of matrix A.",
        "alpha: Multiplication coefficient. For csyrk, alpha is of single-precision complex number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For csyrk, A is of single-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "beta: Multiplication coefficient. For csyrk, beta is of single-precision complex number type.",
        "C: Symmetric matrix C. For csyrk, C is of single-precision complex number type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zsyrk",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型矩阵乘矩阵秩K更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank k update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * A ^ T + α * A * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * A + α * A ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns of matrix A; otherwise, K indicates the number of rows of matrix A.",
        "alpha: Multiplication coefficient. For zsyrk, alpha is of double-precision complex number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For zsyrk, A is of the double-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "beta: Multiplication coefficient. For zsyrk, beta is of double-precision complex number type.",
        "C: Symmetric matrix C. For zsyrk, C is of double-precision complex number type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ssyr2k",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型矩阵乘矩阵秩2更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank 2 update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const float alpha, const float *A, const BLASINT lda, const float *B, const BLASINT ldb, const float beta, float *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * B ^ T + α * B * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * B + α * B ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns in matrices A and B; otherwise, K indicates the number of rows in matrices A and B.",
        "alpha: Multiplication coefficient. For ssyr2k, alpha is of single-precision floating-point type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For ssyr2k, A is of single-precision floating-point type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "B: Matrix B (ldb, kb). If Trans = CblasNoTrans, kb = K; otherwise, kb = N. For ssyr2k, B is of single-precision floating-point type.",
        "ldb: If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, N). Otherwise, ldb must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, ldb must be at least max(1, K). Otherwise, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For ssyr2k, beta is of single-precision floating-point type.",
        "C: Symmetric matrix C. For ssyr2k, C is of single-precision floating-point type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dsyr2k",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型矩阵乘矩阵秩2更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank 2 update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const double alpha, const double *A, const BLASINT lda, const double *B, const BLASINT ldb, const double beta, double *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * B ^ T + α * B * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * B + α * B ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns in matrices A and B; otherwise, K indicates the number of rows in matrices A and B.",
        "alpha: Multiplication coefficient. For dsyr2k, alpha is of double-precision floating-point type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For dsyr2k, A is of double-precision floating-point type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "B: Matrix B (ldb, kb). If Trans = CblasNoTrans, kb = K; otherwise, kb = N. For dsyr2k, B is of double-precision floating-point type.",
        "ldb: If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, N). Otherwise, ldb must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, ldb must be at least max(1, K). Otherwise, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For dsyr2k, beta is of double-precision floating-point type.",
        "C: Symmetric matrix C. For dsyr2k, C is of double-precision floating-point type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_csyr2k",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型矩阵乘矩阵秩2更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank 2 update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * B ^ T + α * B * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * B + α * B ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns in matrices A and B; otherwise, K indicates the number of rows in matrices A and B.",
        "alpha: Multiplication coefficient. For csyr2k, alpha is of single-precision complex number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For csyr2k, A is of single-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "B: Matrix B (ldb, kb). If Trans = CblasNoTrans, kb = K; otherwise, kb = N. For csyr2k, B is of single-precision complex number type.",
        "ldb: If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, N). Otherwise, ldb must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, ldb must be at least max(1, K). Otherwise, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For csyr2k, beta is of single-precision complex number type.",
        "C: Symmetric matrix C. For csyr2k, C is of single-precision complex number type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zsyr2k",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型矩阵乘矩阵秩2更新，矩阵C为对称矩阵",
      "desc_en": "Matrix-matrix multiplication rank 2 update. The matrix C is a symmetric matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Uplo: Storage mode of matrix C",
        "Trans: The matrices A and B are conventional matrices, transpose matrices, or conjugate matrices. If Trans = CblasNoTrans, C = α * A * B ^ T + α * B * A ^ T + β * C. If Trans = CblasTrans/CblasConjTrans, C = α * A ^ T * B + α * B ^ T * A + β * C.",
        "N: Order of matrix C",
        "K: If Trans = CblasNoTrans, K indicates the number of columns in matrices A and B; otherwise, K indicates the number of rows in matrices A and B.",
        "alpha: Multiplication coefficient. For zsyr2k, alpha is of double-precision complex number type.",
        "A: Matrix A (lda, ka). If Trans = CblasNoTrans, ka = K; otherwise, ka = N. For zsyr2k, A is of double-precision complex number type.",
        "lda: If the matrix is column store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, K). Otherwise, lda must be at least max(1, N).",
        "B: Matrix B (ldb, kb). If Trans = CblasNoTrans, kb = K; otherwise, kb = N. For zsyr2k, B is of double-precision complex number type.",
        "ldb: If the matrix is column store and Trans = CblasNoTrans, ldb must be at least max(1, N). Otherwise, ldb must be at least max(1, K). If the matrix is row store and Trans = CblasNoTrans, ldb must be at least max(1, K). Otherwise, ldb must be at least max(1, N).",
        "beta: Multiplication coefficient. For zsyr2k, beta is of double-precision complex number type.",
        "C: Symmetric matrix C. For zsyr2k, C is of double-precision complex number type.",
        "ldc: The value of ldc is at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_strmm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型矩阵乘矩阵,其中一个矩阵为三角矩阵",
      "desc_en": "Matrix-matrix multiplication. One of the matrices is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const float alpha, const float *A, const BLASINT lda, float *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether matrix A is on the left or right of matrix B in the equation. If Side = CblasLeft, B = alpha * op(A) * B. If Side = CblasRight, C = alpha * B * op(A).",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional, transpose, or conjugate matrix. If TranA = CblasNoTrans, op(A) = A. If TranA = CblasTrans, op(A) = A'. If TranA = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether the matrix is a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For strmm, alpha is of single-precision floating-point type.",
        "A: Matrix (lda, k). If Side = CblasLeft, k = M. If Side = CblasRight, k = N. For strmm, A is of single-precision floating-point type.",
        "lda: If the matrix is column store and side = CblasLeft, lda must be at least max(1, M). Otherwise, lda must be at least max(1, N). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, M).",
        "B: Matrix B. For strmm, B is of single-precision floating-point type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtrmm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型矩阵乘矩阵,其中一个矩阵为三角矩阵",
      "desc_en": "Matrix-matrix multiplication. One of the matrices is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const double alpha, const double *A, const BLASINT lda, double *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether matrix A is on the left or right of matrix B in the equation. If Side = CblasLeft, B = alpha * op(A) * B. If Side = CblasRight, C = alpha * B * op(A).",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional, transpose, or conjugate matrix. If TranA = CblasNoTrans, op(A) = A. If TranA = CblasTrans, op(A) = A'. If TranA = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether the matrix is a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For dtrmm, alpha is of double-precision floating-point type.",
        "A: Matrix (lda, k). If Side = CblasLeft, k = M. If Side = CblasRight, k = N. For dtrmm, A is of double-precision floating-point type.",
        "lda: If the matrix is column store and side = CblasLeft, lda must be at least max(1, M). Otherwise, lda must be at least max(1, N). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, M).",
        "B: Matrix B. For dtrmm, B is of double-precision floating-point type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctrmm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型矩阵乘矩阵,其中一个矩阵为三角矩阵",
      "desc_en": "Matrix-matrix multiplication. One of the matrices is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, void *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether matrix A is on the left or right of matrix B in the equation. If Side = CblasLeft, B = alpha * op(A) * B. If Side = CblasRight, C = alpha * B * op(A).",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional, transpose, or conjugate matrix. If TranA = CblasNoTrans, op(A) = A. If TranA = CblasTrans, op(A) = A'. If TranA = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether the matrix is a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For ctrmm, alpha is of single-precision complex number type.",
        "A: Matrix (lda, k). If Side = CblasLeft, k = M. If Side = CblasRight, k = N. For ctrmm, A is of single-precision complex number type.",
        "lda: If the matrix is column store and side = CblasLeft, lda must be at least max(1, M). Otherwise, lda must be at least max(1, N). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, M).",
        "B: Matrix B. For ctrmm, B is of single-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztrmm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型矩阵乘矩阵,其中一个矩阵为三角矩阵",
      "desc_en": "Matrix-matrix multiplication.One of the matrices is a triangular matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, void *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether matrix A is on the left or right of matrix B in the equation. If Side = CblasLeft, B = alpha * op(A) * B. If Side = CblasRight, C = alpha * B * op(A).",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional, transpose, or conjugate matrix. If TranA = CblasNoTrans, op(A) = A. If TranA = CblasTrans, op(A) = A'. If TranA = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether the matrix is a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For ztrmm, alpha is of double-precision complex number type.",
        "A: Matrix (lda, k). If Side = CblasLeft, k = M. If Side = CblasRight, k = N. For ztrmm, A is of double-precision complex number type.",
        "lda: If the matrix is column store and side = CblasLeft, lda must be at least max(1, M). Otherwise, lda must be at least max(1, N). If the matrix is row store and Trans = CblasNoTrans, lda must be at least max(1, N). Otherwise, lda must be at least max(1, M).",
        "B: Matrix B. For ztrmm, B is of the double-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_strsm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型三角矩阵方程求解",
      "desc_en": "Triangular matrix equation solution",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const float alpha, const float *A, const BLASINT lda, float *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether op(A) is on the left or right of X. If Side = CblasLeft, op(A) * X = alpha * B. If Side = CblasRight, X * op(A) = alpha * B.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, op(A) = A. If trans = CblasTrans, op(A) = A'. If trans = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For strsm, alpha is of single-precision floating-point type.",
        "A: Triangular matrix A (lda, N). For strsm, A is of single-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "B: Matrix B. For strsm, B is of single-precision floating-point type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_dtrsm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型三角矩阵方程求解",
      "desc_en": "Triangular matrix equation solution",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const double alpha, const double *A, const BLASINT lda, double *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether op(A) is on the left or right of X. If Side = CblasLeft, op(A) * X = alpha * B. If Side = CblasRight, X * op(A) = alpha * B.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, op(A) = A. If trans = CblasTrans, op(A) = A'. If trans = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For dtrsm, alpha is of double-precision floating-point type.",
        "A: Triangular matrix A (lda, N). For dtrsm, A is of double-precision floating-point type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "B: Matrix B. For dtrsm, B is of double-precision floating-point type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ctrsm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型三角矩阵方程求解",
      "desc_en": "Triangular matrix equation solution",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, void *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether op(A) is on the left or right of X. If Side = CblasLeft, op(A) * X = alpha * B. If Side = CblasRight, X * op(A) = alpha * B.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, op(A) = A. If trans = CblasTrans, op(A) = A'. If trans = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For ctrsm, alpha is of single-precision complex number type.",
        "A: Triangular matrix A (lda, N). For ctrsm, A is of single-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "B: Matrix B. For ctrsm, B is of single-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_ztrsm",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型三角矩阵方程求解",
      "desc_en": "Triangular matrix equation solution",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const BLASINT M, const BLASINT N, const void *alpha, const void *A, const BLASINT lda, void *B, const BLASINT ldb)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "Side: Indicates whether op(A) is on the left or right of X. If Side = CblasLeft, op(A) * X = alpha * B. If Side = CblasRight, X * op(A) = alpha * B.",
        "Uplo: Indicates whether the upper triangle or the lower triangle of the matrix A is used. If Uplo = CblasUpper, the upper triangular part of A is used. If Uplo = CblasLower, the lower triangular part of A is used.",
        "TranA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If trans = CblasNoTrans, op(A) = A. If trans = CblasTrans, op(A) = A'. If trans = CblasConjTrans, op(A) = conjg(A').",
        "Diag: Indicates whether A is a unit trigonometric matrix. If Diag = CblasUnit, A is a unit triangular matrix. If Diag = CblasNonUnit, A is not a unit triangular matrix.",
        "M: Number of rows of matrix B",
        "N: Number of columns of matrix B",
        "alpha: Multiplication coefficient. For ztrsm, alpha is of double-precision complex number type.",
        "A: Triangular matrix A (lda, N). For ztrsm, A is of double-precision complex number type.",
        "lda: Leading dimension of matrix A. The value of lda must be greater than or equal to max(1, N).",
        "B: Matrix B. For ztrsm, B is of double-precision complex number type.",
        "ldb: If the matrix is column store, ldb must be at least max(1, M). Otherwise, ldb must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_saxpby",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度类型向量分别缩放与加和",
      "desc_en": "Vector scaling and summation separately",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_saxpby(const BLASINT n, const float alpha, const float *x, const BLASINT incx, const float beta, float *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "alpha: Multiplication coefficient. Single-precision floating-point type for saxpby.",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For saxpby, x is of single-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "beta: Multiplication coefficient. Single-precision floating-point type for saxpby",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For saxpby, y is of single-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_daxpby",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度类型向量分别缩放与加和",
      "desc_en": "Vector scaling and summation separately",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_daxpby(const BLASINT n, const double alpha, const double *x, const BLASINT incx, const double beta, double *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "alpha: Multiplication coefficient. Double-precision floating-point type for daxpby",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For daxpby, x is of double-precision floating-point type.",
        "incx: Increment for the elements of vector x",
        "beta: Multiplication coefficient. Double-precision floating-point type for daxpby",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For daxpby, y is of double-precision floating-point type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_caxpby",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型向量分别缩放与加和",
      "desc_en": "Vector scaling and summation separately",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_caxpby(const BLASINT n, const void *alpha, const void *x, const BLASINT incx, const void *beta, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "alpha: Multiplication coefficient. Single-precision complex number type for caxpby",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For caxpby, x is of single-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "beta: Multiplication coefficient. Single-precision complex number type for caxpby",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For caxpby, y is of single-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_zaxpby",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型向量分别缩放与加和",
      "desc_en": "Vector scaling and summation separately",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zaxpby(const BLASINT n, const void *alpha, const void *x, const BLASINT incx, const void *beta, void *y, const BLASINT incy)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "n: Number of elements in the x vector",
        "alpha: Multiplication coefficient. Double-precision complex number type for zaxpby",
        "x: Vector x. The vector size is at least (1+(n-1)*abs(incx)). For zaxpby, x is of double-precision complex number type.",
        "incx: Increment for the elements of vector x",
        "beta: Multiplication coefficient. Double-precision complex number type for zaxpby",
        "y: Vector y. The vector size is at least (1+(n-1)*abs(incy)). For zaxpby, y is of double-precision complex number type.",
        "incy: Increment for the elements of vector y"
      ],
      "return": ""
    },
    {
      "name": "cblas_cgemm3m",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "一般单精度复数矩阵乘矩阵",
      "desc_en": "Product of a general single-precision complex matrix and a matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_cgemm3m(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const BLASINT M, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, op(A) = A. If TransA = CblasTrans, op(A) = A'. If TransA = CblasConjTrans, op(A) = conjg(A').",
        "TransB: Indicates whether the matrix B is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, op(B) = B. If TransA = CblasTrans, op(B) = B'. If TransA = CblasConjTrans, op(B) = conjg(B').",
        "M: Number of rows of matrices op(A) and C",
        "N: Number of columns of matrices op(B) and C",
        "K: Number of columns of the matrix op(A) and the number of rows of the matrix op(B)",
        "alpha: Multiplication coefficient. For cgemm3m, alpha is of single-precision complex number type.",
        "A: Matrix A. For cgemm3m, A is of single-precision complex number type.",
        "lda: If the matrix is column store and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K).",
        "B: Matrix B. For cgemm3m, B is of single-precision complex number type.",
        "ldb: If the matrix is column store and TransA = CblasNoTrans, ldb is at least max(1, K); otherwise, ldb is at least max(1, N). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, N); otherwise, lda is at least max(1, K).",
        "beta: Multiplication coefficient. For cgemm3m, beta is of single-precision complex number type.",
        "C: Matrix C. For cgemm3m, C is of single-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). Otherwise, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "cblas_zgemm3m",
      "library": "KML_BLAS",
      "headerfile": "",
      "desc_cn": "一般双精度复数矩阵乘矩阵",
      "desc_en": "Product of a general double-precision complex matrix and a matrix",
      "benefit_cn": "基于鲲鹏架构，通过向量化、数据预取、编译优化、数据重排等手段，实现性能优化",
      "benefit_en": "Based on the Kunpeng architecture, performance is optimized through vectorization, data prefetching, compilation optimization, and data rearrangement.",
      "func_name": "void cblas_zgemm3m(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const BLASINT M, const BLASINT N, const BLASINT K, const void *alpha, const void *A, const BLASINT lda, const void *B, const BLASINT ldb, const void *beta, void *C, const BLASINT ldc)",
      "headerfile_desc": "kblas.h",
      "parameters": [
        "order: Indicates whether the matrix is in row- or column-major order.",
        "TransA: Indicates whether the matrix A is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, op(A) = A. If TransA = CblasTrans, op(A) = A'. If TransA = CblasConjTrans, op(A) = conjg(A').",
        "TransB: Indicates whether the matrix B is a conventional matrix, a transpose matrix, or a conjugate matrix. If TransA = CblasNoTrans, op(B) = B. If TransA = CblasTrans, op(B) = B'. If TransA = CblasConjTrans, op(B) = conjg(B').",
        "M: Number of rows of matrices op(A) and C",
        "N: Number of columns of matrices op(B) and C",
        "K: Number of columns of the matrix op(A) and the number of rows of the matrix op(B)",
        "alpha: Multiplication coefficient. For zgemm3m, alpha is of double-precision complex number type.",
        "A: Matrix A. For zgemm3m, A is of double-precision complex number type.",
        "lda: If the matrix is column store and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, M); otherwise, lda is at least max(1, K).",
        "B: Matrix B. For zgemm3m, B is of double-precision complex number type.",
        "ldb: If the matrix is column store and TransA = CblasNoTrans, ldb is at least max(1, K); otherwise, ldb is at least max(1, N). If A is a row-store matrix and TransA = CblasNoTrans, lda is at least max(1, N); otherwise, lda is at least max(1, K).",
        "beta: Multiplication coefficient. For zgemm3m, beta is of double-precision complex number type.",
        "C: Matrix C. For zgemm3m, C is of double-precision complex number type.",
        "ldc: If the matrix is column store, ldc must be at least max(1, M). Otherwise, ldc must be at least max(1, N)."
      ],
      "return": ""
    },
    {
      "name": "kml_sparse_saxpyi",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，稀疏向量与标量相乘，并将结果加到另一向量上",
      "desc_en": "single-precision real type, computes the product of a sparse vector and a scalar and stores the product to another vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供了高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_saxpyi(const KML_INT nz, const float a, const float *x, const KML_INT *indx, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "a: Scalar a. For saxpyi, a is of single-precision floating-point type.",
        "x: Array x for storing non-zero elements. The size is at least nz. For saxpyi, x is a single-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For saxpyi, y is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_daxpyi",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，稀疏向量与标量相乘，并将结果加到另一向量上",
      "desc_en": "double-precision real type, computes the product of a sparse vector and a scalar and stores the product to another vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供了高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_daxpyi(const KML_INT nz, const double a, const double *x, const KML_INT *indx, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "a: Scalar a. For daxpyi, a is of double-precision floating-point type.",
        "x: Array x for storing non-zero elements. The size is at least nz. For daxpyi, x is a double-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For daxpyi, y is a double-precision floating-point array.."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_caxpyi",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，稀疏向量与标量相乘，并将结果加到另一向量上",
      "desc_en": "single-precision complex type, computes the product of a sparse vector and a scalar and stores the product to another vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_caxpyi(const KML_INT nz, const KML_Complex8 a, const KML_Complex8 *x, const KML_INT *indx, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "a: Scalar a. For caxpyi, a is a single-precision complex number.",
        "x: Array x for storing non-zero elements. The size is at least nz. For caxpyi, x is a single-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For caxpyi, y is a single-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zaxpyi",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，稀疏向量与标量相乘，并将结果加到另一向量上",
      "desc_en": "double-precision complex type, computes the product of a sparse vector and a scalar and stores the product to another vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zaxpyi(const KML_INT nz, const KML_Complex16 a, const KML_Complex16 *x, const KML_INT *indx, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "a: Scalar a. For zaxpyi, a is a double-precision complex number.",
        "x: Array x for storing non-zero elements. The size is at least nz. For zaxpyi, x is a double-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For zaxpyi, y is a double-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_sdoti",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型稀疏向量点积",
      "desc_en": "Computes the dot product of single-precision real type sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供了高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_sdoti(const KML_INT nz, const float *x,const KML_INT *indx, const float *y, float *doti)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Array x for storing non-zero elements. The size is at least nz. For sdoti, x is a single-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For sdoti, y is a single-precision floating-point array.",
        "doti: Dot product of x and y. For sdoti, doti is of the single-precision floating-point type."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ddoti",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型稀疏向量点积",
      "desc_en": "Computes the dot product of double-precision real type sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供了高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ddoti(const KML_INT nz, const double *x,const KML_INT *indx, const double *y, double *doti)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Array x for storing non-zero elements. The size is at least nz. For ddoti, x is a double-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For ddoti, y is a double-precision floating-point array.",
        "doti: Dot product of x and y. For ddoti, doti is of the double-precision floating-point type."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_cdotci_sub",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型稀疏向量共轭点积",
      "desc_en": "Computes the dot conjugate product of single-precision complex type sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_cdotci_sub(const KML_INT nz, const KML_Complex8 *x, const KML_INT *indx, const KML_Complex8 *y, KML_Complex8 *dotci)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Array x for storing non-zero elements. The size is at least nz. For cdotci, x is a single-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For cdotci, y is a single-precision floating-point array.",
        "dotci: Returns the conjugate dot product of x and y. For cdotci, dotci is of the single-precision floating-point type."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zdotci_sub",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型稀疏向量共轭点积",
      "desc_en": "Computes the conjugate dot product of double-precision complex type sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zdotci_sub(const KML_INT nz, const KML_Complex16 *x, const KML_INT *indx, const KML_Complex16 *y, KML_Complex16 *dotci)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Array x for storing non-zero elements. The size is at least nz. For zdotci, x is a double-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For zdotci, y is a double-precision floating-point array.",
        "dotci: Returns the conjugate dot product of x and y. For zdotci, dotci is of the double-precision floating-point type."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_cdotui_sub",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数域稀疏向量非共轭点积",
      "desc_en": "Computes the dot non-conjugate product of single-precision complex sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_cdotui_sub(const KML_INT nz, const KML_Complex8 *x, const KML_INT *indx, const KML_Complex8 *y, KML_Complex8 *dotui)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Array x for storing non-zero elements. The size is at least nz. For cdotui, x is a single-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For cdotui, y is a single-precision floating-point array.",
        "doti: Dot product of x and y. For cdotui, dotui is of the single-precision floating-point type."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zdotui_sub",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数域稀疏向量非共轭点积",
      "desc_en": "Computes the non-conjugate dot product of double-precision complex sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zdotui_sub(const KML_INT nz, const KML_Complex16 *x, const KML_INT *indx,const KML_Complex16 *y, KML_Complex16 *dotui)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Array x for storing non-zero elements. The size is at least nz. For zdotui, x is a double-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz.",
        "y: Array y. The size is at least max(indx[i]). For zdotui, y is a double-precision floating-point array.",
        "dotui: Dot product of x and y. For zdotui, dotui is of the double-precision floating-point type."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_sgthr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，将全存储稀疏向量中的指定元素加载到压缩格式的稀疏向量中",
      "desc_en": "single-precision real type, Gathers the specified elements of a full-storage vector into a compressed sparse vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_sgthr(const KML_INT nz, const float *y, float *x, const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For sgthr, y is a single-precision floating-point array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For sgthr, x is a single-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dgthr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，将全存储向量中的指定元素加载到压缩格式的稀疏向量中",
      "desc_en": "double-precision real type, Gathers the specified elements of a full-storage vector into a compressed sparse vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dgthr(const KML_INT nz, const double *y, double *x,const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For dgthr, y is a double-precision floating-point array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For dgthr, x is a double-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_cgthr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，将全存储向量中的指定元素加载到压缩格式的稀疏向量中",
      "desc_en": "single-precision complex type, Gathers the specified elements of a full-storage vector into a compressed sparse vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_cgthr(const KML_INT nz, const KML_Complex8 *y, KML_Complex16 *x, const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For cgthr, y is a single-precision complex number array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For cgthr, x is a single-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zgthr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，将全存储稀疏向量中的指定元素加载到压缩格式的向量中",
      "desc_en": "double-precision complex type, Gathers the specified elements of a full-storage sparse vector into a compressed vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zgthr(const KML_INT nz, const KML_Complex16 *y, KML_Complex16 *x, const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For zgthr, y is a double-precision complex number array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For zgthr, x is a double-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_sgthrz",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，将全存储向量中的指定元素加载到压缩格式的向量中，并将全存储向量元素清零",
      "desc_en": "single-precision real type, Gathers the specified elements of a full-storage vector into a compressed sparse vector, and zeroes out these elements in the full-storage vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_sgthrz(const KML_INT nz, float *y, float *x, const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For sgthrz, y is a single-precision floating-point array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For sgthrz, x is a single-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dgthrz",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，将全存储向量中的指定元素加载到压缩格式的稀疏向量中，并将全存储向量元素清零",
      "desc_en": "double-precision real type, Gathers the specified elements of a full-storage vector into a compressed sparse vector, and zeroes out these elements in the full-storage vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dgthrz(const KML_INT nz, double *y, double *x, const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For dgthrz, y is a double-precision floating-point array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For dgthrz, x is a double-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_cgthrz",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，将全存储向量中的指定元素加载到压缩格式的稀疏向量中，并将全存储向量元素清零",
      "desc_en": "single-precision complex type, Gathers the specified elements of a full-storage vector into a compressed sparse vector, and zeroes out these elements in the full-storage vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_cgthrz(const KML_INT nz, KML_Complex8 *y, KML_Complex8 *x, const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For cgthrz, y is a single-precision complex number array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For cgthrz, x is a single-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zgthrz",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，将全存储向量中的指定元素加载到压缩格式的稀疏向量中，并将全存储向量元素清零",
      "desc_en": "double-precision complex type, Gathers the specified elements of a full-storage vector into a compressed sparse vector, and zeroes out these elements in the full-storage vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zgthrz(const KML_INT nz, KML_Complex16 *y, KML_Complex16 *x, const KML_INT *indx)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "y: Array y. The size is at least max(indx[i]). For zgthrz, y is a double-precision complex number array.",
        "x: Array x for storing non-zero elements. The size is at least nz. For zgthrz, x is a double-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_sroti",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "对两个单精度实数类型稀疏向量进行旋转",
      "desc_en": "Rotates two single-precision real type sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_sroti(const KML_INT nz, float *x, const KML_INT *indx, float *y, const float c, const float s)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Vector x in compressed form. The size is at least nz. For sroti, x is a single-precision floating-point array.",
        "y: Vector y in full-storage form. The size is at least max(indx[i]). For sroti, y is a single-precision floating-point array.",
        "c: Scalar c. For sroti, c is a single-precision floating-point array.",
        "s: Scalar s. For sroti, z is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_droti",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "对两个双精度实数类型稀疏向量进行旋转",
      "desc_en": "Rotates two double-precision real type sparse vectors",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_droti(const KML_INT nz, double *x, const KML_INT *indx, double *y, const double c, const double s)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays",
        "x: Vector x in compressed form. The size is at least nz. For sroti, x is a single-precision floating-point array.",
        "y: Vector y in full-storage form. The size is at least max(indx[i]). For sroti, y is a single-precision floating-point array.",
        "c: Scalar c. For sroti, c is a single-precision floating-point array.",
        "s: Scalar s. For sroti, z is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ssctr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "将压缩格式的单精度类型向量写入全存储稀疏向量的指定位置",
      "desc_en": "Writes a compressed single-precision type vector to the specified location of a full-storage sparse vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ssctr(const KML_INT nz, const float *x, const KML_INT *indx, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays.",
        "x: Array x for storing non-zero elements. The size is at least nz. For ssctr, x is a single-precision floating-point array.",
        "y: Array y. The size is at least max(indx[i]). For ssctr, y is a single-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dsctr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "将压缩格式的双精度类型向量写入全存储稀疏向量的指定位置",
      "desc_en": "Writes a compressed double-precision type vector to the specified location of a full-storage sparse vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dsctr(const KML_INT nz, const double *x, const KML_INT *indx, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays.",
        "x: Array x for storing non-zero elements. The size is at least nz. For dsctr, x is a double-precision floating-point array.",
        "y: Array y. The size is at least max(indx[i]). For dsctr, y is a double-precision floating-point array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_csctr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "将压缩格式的单精度复数类型向量写入全存储稀疏向量的指定位置",
      "desc_en": "Writes a compressed single-precision complex type vector to the specified location of a full-storage sparse vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_csctr(const KML_INT nz, const KML_Complex8 *x, const KML_INT *indx, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays.",
        "x: Array x for storing non-zero elements. The size is at least nz. For csctr, x is a single-precision complex number array.",
        "y: Array y. The size is at least max(indx[i]). For csctr, y is a single-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zsctr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "将压缩格式的双精度复数类型向量写入全存储稀疏向量的指定位置",
      "desc_en": "Writes a compressed double-precision complex type vector to the specified location of a full-storage sparse vector",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zsctr(const KML_INT nz, const KML_Complex16 *x, const KML_INT *indx, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "nz: Number of elements in the x and indx arrays.",
        "x: Array x for storing non-zero elements. The size is at least nz. For zsctr, x is a double-precision complex number array.",
        "y: Array y. The size is at least max(indx[i]). For zsctr, y is a double-precision complex number array.",
        "indx: indx[i] indicates the serial number of the ith element in the x array in the dense vector. The array size is at least nz."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏通常矩阵",
      "desc_en": "single-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const float *a, const KML_INT *ia, const KML_INT *ja, const float *x, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then  y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then then  y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Array values for storing non-zero elements in matrix A",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Array of vector x",
        "y: Array of vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏通常矩阵",
      "desc_en": "double-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const double *a, const KML_INT *ia, const KML_INT *ja, const double *x, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then  y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then then  y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Array values for storing non-zero elements in matrix A",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Array of vector x",
        "y: Array of vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏通常矩阵",
      "desc_en": "single-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex8 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex8 *x, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then  y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then then  y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Array values for storing non-zero elements in matrix A",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Array of vector x",
        "y: Array of vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏通常矩阵",
      "desc_en": "double-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex16 *a ,const KML_INT *ia, const KML_INT *ja, const KML_Complex16 *x, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then  y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then then  y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Array values for storing non-zero elements in matrix A",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Array of vector x",
        "y: Array of vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "single-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const float *a, const KML_INT *ia, const KML_INT *ja, const float *x, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "double-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const double *a, const KML_INT *ia, const KML_INT *ja, const double *x, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "single-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const KML_Complex8 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex8 *x, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "double-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const KML_Complex16 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex16 *x, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "single-precision real type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char diag, const KML_INT m, const float *a, const KML_INT *ia, const KML_INT *ja, const float *x, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y . If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For scsrtrsv, val is a single-precision floating-point array.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For scsrtrsv, x is a single-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For scsrtrsv, y is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "double-precision real type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char diag, const KML_INT m, const double *a, const KML_INT *ia, const KML_INT *ja, const double *x, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y . If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For dcsrtrsv, val is a double-precision floating-point array.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For dcsrtrsv, x is a double-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For dcsrtrsv, y is a double-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "single-precision complex type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char *diag, const KML_INT m, const KML_Complex8 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex8 *x, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y . If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For ccsrtrsv, val is a single-precision complex number array.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For ccsrtrsv, x is a single-precision complex number array.",
        "y: Vector y. This parameter is output after update. For ccsrtrsv, y is a single-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基1索引的稀疏对称矩阵",
      "desc_en": "double-precision complex type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with one-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char *diag, const KML_INT m, const KML_Complex16 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex16 *x, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y . If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For zcsrtrsv, val is a double-precision complex number array.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For zcsrtrsv, x is a double-precision complex number array.",
        "y: Vector y. This parameter is output after update. For zcsrtrsv, y is a double-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_scsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏通常矩阵",
      "desc_en": "single-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_scsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const float *a, const KML_INT *ia, const KML_INT *ja, const float *x, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "y: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏通常矩阵",
      "desc_en": "double-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_dcsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const double *a, const KML_INT *ia, const KML_INT *ja, const double *x, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "y: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏通常矩阵",
      "desc_en": "single-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_ccsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex8 *a, const KML_INT *ia, const KML_INT *ja",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "y: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcsrgemv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏通常矩阵",
      "desc_en": "double-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse general matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_zcsrgemv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex16 *a ,const KML_INT *ia, const KML_INT *ja, const KML_Complex16 *x, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = A * X. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = A^T * X. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = A^H * X.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "y: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_scsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "single-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_scsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const float *a, const KML_INT *ia, const KML_INT *ja, const float *x, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "double-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_dcsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const double *a, const KML_INT *ia, const KML_INT *ja, const double *x, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "single-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_ccsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const KML_Complex8 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex8 *x, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcsrsymv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与向量乘积，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "double-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_zcsrsymv(const kml_sparse_fill_mode_t uplo, const KML_INT m, const KML_Complex16 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex16 *x, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle L: lower triangle",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A.",
        "ia: Vector whose length is m+1. ia[i]-1 indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x",
        "j: Vector y, which is output after being updated."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_scsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "single-precision real type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_scsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char diag, const KML_INT m, const float *a, const KML_INT *ia, const KML_INT *ja, const float *x, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For c_scsrtrsv, val is a single-precision floating-point array.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For c_scsrtrsv, x is a single-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For c_scsrtrsv, y is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "double-precision real type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_dcsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char diag, const KML_INT m, const double *a, const KML_INT *ia, const KML_INT *ja, const double *x, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For c_dcsrtrsv, val is a double-precision floating-point array.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For c_dcsrtrsv, x is a double-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For c_dcsrtrsv, y is a double-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "single-precision complex type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_ccsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char *diag, const KML_INT m, const KML_Complex8 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex8 *x, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For c_ccsrtrsv, val is a single-precision complex number array.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For c_ccsrtrsv, x is a single-precision complex number array.",
        "y: Vector y. This parameter is output after update. For c_ccsrtrsv, y is a single-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcsrtrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，求解三角矩阵方程组计算，矩阵是CSR格式(三数组)基0索引的稀疏对称矩阵",
      "desc_en": "double-precision complex type, Triangular matrix equation solution. The matrix is a sparse symmetric matrix stored in the CSR format(3-array variation) with zero-based indexing",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_csparse_zcsrtrsv(const char uplo, const kml_sparse_operation_t opt, const char *diag, const KML_INT m, const KML_Complex16 *a, const KML_INT *ia, const KML_INT *ja, const KML_Complex16 *x, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "Uplo: Indicates whether the upper triangle or lower triangle of matrix A is used. U: upper triangle, L: lower triangle.",
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then x = A * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then x = A ^ T * y.If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then x = A ^ H * y.",
        "diag: Whether the diagonal element is a unit element. U: unit element, N: non-unit element.",
        "m: Number of rows and columns in matrix A. The value range is [1, MAX_KML_INT].",
        "a: Non-zero elements in matrix A. For c_zcsrtrsv, val is a double-precision complex number array.",
        "ia: Vector whose length is m+1. ia[i] indicates the index of the first non-zero element in the ith row of matrix A in the val array.",
        "ja: ja[i] indicates the column number of the ith element of the val array in matrix A.",
        "x: Vector x. For c_zcsrtrsv, x is a double-precision complex number array.",
        "y: Vector y. This parameter is output after update. For c_zcsrtrsv, y is a double-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与向量乘积，矩阵是CSR格式的稀疏矩阵",
      "desc_en": "single-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse matrix stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *x, const float beta, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For scsrmv, alpha is of single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For scsrmv, val is a single-precision floating-point array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For scsrmv, x is a single-precision floating-point array.",
        "beta: Coefficient. For scsrmv, beta is of the single-precision floating-point type.",
        "y: Vector y, which is output after being updated. For scsrmv, y is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcsrmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与向量乘积，矩阵是CSR格式的稀疏矩阵",
      "desc_en": "double-precision real type, Computes the product of a matrix and a vector. The matrix is a sparse matrix stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const double *x, const double beta, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For dcsrmv, alpha is of double-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For dcsrmv, val is a double-precision floating-point array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For dcsrmv, x is a double-precision floating-point array.",
        "beta: Coefficient. For dcsrmv, beta is of the double-precision floating-point type.",
        "y: Vector y, which is output after being updated. For dcsrmv, y is a double-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccsrmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与向量乘积，矩阵是CSR格式的稀疏矩阵",
      "desc_en": "single-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse matrix stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *x, const KML_Complex8 beta, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For ccsrmv, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For ccsrmv, val is a single-precision complex number array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For ccsrmv, x is a single-precision complex number array.",
        "beta: Coefficient. For ccsrmv, beta is a single-precision complex number.",
        "y: Vector y, which is output after being updated. For ccsrmv, y is a single-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcsrmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与向量乘积，矩阵是CSR格式的稀疏矩阵",
      "desc_en": "double-precision complex type, Computes the product of a matrix and a vector. The matrix is a sparse matrix stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *x, const KML_Complex16 beta, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For zcsrmv, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For zcsrmv, val is a double-precision complex number array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For zcsrmv, x is a double-precision complex number array.",
        "beta: Coefficient. For zcsrmv, beta is a double-precision complex number.",
        "y: Vector y, which is output after being updated. For zcsrmv, y is a double-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，求解稀疏线性方程组，稀疏矩阵存储格式为CSR",
      "desc_en": "single-precision real type, Solves a system of linear equations for a sparse matrix that is stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrsv(const kml_sparse_operation_t opt, const KML_INT m, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *x, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For scsrsv, alpha is of single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For scsrsv, val is a single-precision floating-point array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For scsrsv, x is a single-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For scsrsv, y is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcsrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，求解稀疏线性方程组，稀疏矩阵存储格式为CSR",
      "desc_en": "double-precision real type, Solves a system of linear equations for a sparse matrix that is stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrsv(const kml_sparse_operation_t opt, const KML_INT m, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const double *x, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For dcsrsv, alpha is of double-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For dcsrsv, val is a double-precision floating-point array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For dcsrsv, x is a double-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For dcsrsv, y is a double-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccsrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型求解稀疏线性方程组，稀疏矩阵存储格式为CSR",
      "desc_en": "single-precision complex type, Solves a system of linear equations for a sparse matrix that is stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrsv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *x, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For ccsrsv, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For ccsrsv, val is a single-precision complex number array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For ccsrsv, x is a single-precision complex number array.",
        "y: Vector y. This parameter is output after update. For ccsrsv, y is a single-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcsrsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，求解稀疏线性方程组，稀疏矩阵存储格式为CSR",
      "desc_en": "double-precision complex type, Solves a system of linear equations for a sparse matrix that is stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrsv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *x, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For zcsrsv, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For zcsrsv, val is a double-precision complex number array.",
        "indx: indx[i] indicates the column number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Vector x. For zcsrsv, x is a double-precision complex number array.",
        "y: Vector y. This parameter is output after update. For zcsrsv, y is a double-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，计算CSR格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "single-precision real type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *b, const KML_INT ldb, const float beta , float *c , const KML_INT ldc)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix C. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For scsrmm, alpha is of the single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For scsrmm, val is a single-precision floating-point array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "b: Array value of matrix B. For scsrmm, b is a single-precision floating-point array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing. Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For scsrmm, beta is of the single-precision floating-point type.",
        "c: Array value of matrix C. For scsrmm, c is a single-precision floating-point array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing. Size of the second dimension of matrix C for zero-based indexing."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcsrmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，计算CSR格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "double-precision real type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const double *b, const KML_INT ldb, const double beta , double *c , const KML_INT ldc)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix C. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For dcsrmm, alpha is of the double-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For dcsrmm, val is a double-precision floating-point array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "b: Array value of matrix B. For dcsrmm, b is a double-precision floating-point array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing. Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For dcsrmm, beta is of the double-precision floating-point type.",
        "c: Array value of matrix C. For dcsrmm, c is a double-precision floating-point array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing. Size of the second dimension of matrix C for zero-based indexing."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccsrmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，计算CSR格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "single-precision complex type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *b, const KML_INT ldb, const KML_Complex8 beta , KML_Complex8 *c , const KML_INT ldc)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix C. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For ccsrmm, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For ccsrmm, val is a single-precision complex number array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "b: Array value of matrix B. For ccsrmm, b is a single-precision complex number array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing. Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For ccsrmm, beta is a single-precision complex number.",
        "c: Array value of matrix C. For ccsrmm, c is a single-precision complex number array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing. Size of the second dimension of matrix C for zero-based indexing."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcsrmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，计算CSR格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "double-precision complex type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *b, const KML_INT ldb, const KML_Complex16 beta , KML_Complex16 *c , const KML_INT ldc)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix C. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For zcsrmm, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For zcsrmm, val is a double-precision complex number array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "b: Array value of matrix B. For zcsrmm, b is a double-precision complex number array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing. Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For zcsrmm, beta is a double-precision complex number.",
        "c: Array value of matrix C. For zcsrmm, c is a double-precision complex number array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing. Size of the second dimension of matrix C for zero-based indexing."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scscmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与向量乘积，矩阵是CSC格式的稀疏矩阵",
      "desc_en": "single-precision real type, Computes the product of a matrix and a vector. The natrix is a sparse matrix stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scscmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *x, const float beta, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For scscmv, alpha is of the single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For scscmv, val is a single-precision floating-point array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For scscmv, x is a single-precision floating-point array.",
        "beta: For scscmv, beta is of the single-precision floating-point type.",
        "y: Vector y. This parameter is output after update. For scscmv, y is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcscmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与向量乘积，矩阵是CSC格式的稀疏矩阵",
      "desc_en": "double-precision real type, Computes the product of a matrix and a vector. The natrix is a sparse matrix stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcscmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const double *x, const double beta, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient.For dcscmv, alpha is of the double-precision floating-point type..",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For dcscmv, val is a double-precision floating-point array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For dcscmv, x is a double-precision floating-point array.",
        "beta: Coefficient. For dcscmv, beta is of the double-precision floating-point type.",
        "y: Vector y. This parameter is output after update. For dcscmv, y is a double-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccscmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与向量乘积，矩阵是CSC格式的稀疏矩阵",
      "desc_en": "single-precision complex type, Computes the product of a matrix and a vector. The natrix is a sparse matrix stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccscmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *x, const KML_Complex8 beta, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For ccscmv, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For ccscmv, val is a single-precision complex number array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For ccscmv, x is a single-precision complex number array.",
        "beta: Coefficient. For ccscmv, beta is a single-precision complex number.",
        "y: Vector y. This parameter is output after update. For ccscmv, y is a single-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcscmv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与向量乘积，矩阵是CSC格式的稀疏矩阵",
      "desc_en": "double-precision complex type, Computes the product of a matrix and a vector. The natrix is a sparse matrix stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcscmv(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT k, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *x, const KML_Complex16 beta, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then y = alpha * A * x + beta * y. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then y = alpha * A ^ T * x + beta * y. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then y = alpha * A ^ H * x + beta * y.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For zcscmv, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For zcscmv, val is a double-precision complex number array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For zcscmv, x is a double-precision complex number array.",
        "beta: Coefficient. For zcscmv, beta is a double-precision complex number.",
        "y: Vector y. This parameter is output after update. For zcscmv, y is a double-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scscsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，求解稀疏线性方程组，稀疏矩阵存储格式为CSC",
      "desc_en": "single-precision real type, Solves a system of linear equations for a sparse matrix that is stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scscsv(const kml_sparse_operation_t opt, const KML_INT m, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *x, const float beta, float *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For scscsv, alpha is of the single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For scscsv, val is a single-precision floating-point array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For scscsv, x is a single-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For scscsv, y is a single-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_dcscsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，求解稀疏线性方程组，稀疏矩阵存储格式为CSC",
      "desc_en": "double-precision real type, Solves a system of linear equations for a sparse matrix that is stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcscsv(const kml_sparse_operation_t opt, const KML_INT m, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const double *x, const double beta, double *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For dcscsv, alpha is of the double-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For dcscsv, val is a double-precision floating-point array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For dcscsv, x is a double-precision floating-point array.",
        "y: Vector y. This parameter is output after update. For dcscsv, y is a double-precision floating-point array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_ccscsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，求解稀疏线性方程组，稀疏矩阵存储格式为CSC",
      "desc_en": "single-precision complex type, Solves a system of linear equations for a sparse matrix that is stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccscsv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *x, const KML_Complex8 beta, KML_Complex8 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For ccscsv, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For ccscsv, val is a single-precision complex number array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For ccscsv, x is a single-precision complex number array.",
        "y: Vector y. This parameter is output after update. For ccscsv, y is a single-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_zcscsv",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，求解稀疏线性方程组，稀疏矩阵存储格式为CSC",
      "desc_en": "double-precision complex type, Solves a system of linear equations for a sparse matrix that is stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcscsv(const kml_sparse_operation_t opt, const KML_INT m, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *x, const KML_Complex16 beta, KML_Complex16 *y)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Operation on matrix A. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Coefficient. For zcscsv, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Non-zero elements in matrix A. For zcscsv, val is a double-precision complex number array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Vector x. For zcscsv, x is a double-precision complex number array.",
        "y: Vector y. This parameter is output after update. For zcscsv, y is a double-precision complex number array."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scscmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，计算CSC格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "single-precision real type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scscmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *b, const KML_INT ldb, const float beta , float *c , const KML_INT ldc)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then C = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For scscmm, alpha is of the single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For scscmm, val is a single-precision floating-point array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "b: Array value of matrix B. For scscmm, b is a single-precision floating-point array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing, Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For scscmm, beta is of the single-precision floating-point type.",
        "c: Array value of matrix C. For scscmm, c is a single-precision floating-point array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing, Size of the second dimension of matrix C for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcscmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，计算CSC格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "double-precision real type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcscmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const double *b, const KML_INT ldb, const double beta , double *c , const KML_INT ldc)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then C = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For dcscmm, alpha is of the double-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For dcscmm, val is a double-precision floating-point array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "b: Array value of matrix B. For dcscmm, b is a double-precision floating-point array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing, Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For dcscmm, beta is of the double-precision floating-point type.",
        "c: Array value of matrix C. For dcscmm, c is a double-precision floating-point array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing, Size of the second dimension of matrix C for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccscmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，计算CSC格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "single-precision complex type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccscmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *b, const KML_INT ldb, const KML_Complex8 beta , KML_Complex8 *c , const KML_INT ldc)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then C = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For ccscmm, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For ccscmm, val is a single-precision complex number array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "b: Array value of matrix B. For ccscmm, b is a single-precision complex number array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing, Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For ccscmm, beta is a single-precision complex number.",
        "c: Array value of matrix C. For ccscmm, c is a single-precision complex number array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing, Size of the second dimension of matrix C for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcscmm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，计算CSC格式的稀疏矩阵和稠密矩阵的积",
      "desc_en": "double-precision complex type, Computes the product of a sparse matrix and a dense matrix that are stored in the CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcscmm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *b, const KML_INT ldb, const KML_Complex16 beta , KML_Complex16 *c , const KML_INT ldc)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = alpha * A * B + beta * C. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then C = alpha * A ^ T * B + beta * C. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For zcscmm, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For zcscmm, val is a double-precision complex number array.",
        "indx: indx[i] indicates the row number of the ith element of the val array in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "b: Array value of matrix B. For zcscmm, b is a double-precision complex number array.",
        "ldb: Size of the leading dimension of matrix B for one-based indexing, Size of the second dimension of matrix B for zero-based indexing.",
        "beta: Scalar beta. For zcscmm, beta is a double-precision complex number.",
        "c: Array value of matrix C. For zcscmm, c is a double-precision complex number array.",
        "ldc: Size of the leading dimension of matrix C for one-based indexing, Size of the second dimension of matrix C for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSR格式",
      "desc_en": "single-precision real type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *x, const KML_INT ldx, float *y , const KML_INT ldy)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For scsrsm, alpha is of the single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For scsrsm, val is a single-precision floating-point array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Array value of matrix x. For scsrsm, x is a single-precision floating-point array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For scsrsm, y is a single-precision floating-point array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcsrsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSR格式",
      "desc_en": "double-precision real type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre,const double *x, const KML_INT ldx, double *y , const KML_INT ldy)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For dcsrsm, alpha is of the double-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For dcsrsm, val is a double-precision floating-point array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Array value of matrix x. For dcsrsm, x is a double-precision floating-point array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For dcsrsm, y is a double-precision floating-point array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccsrsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSR格式",
      "desc_en": "single-precision complex type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *x, const KML_INT ldx, KML_Complex8 *y, const KML_INT ldy);",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For ccsrsm, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For ccsrsm, val is a single-precision complex number array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Array value of matrix x. For ccsrsm, x is a single-precision complex number array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For ccsrsm, y is a single-precision complex number array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcsrsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSR格式",
      "desc_en": "double-precision complex type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSR format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *x, const KML_INT ldx, KML_Complex16 *y , const KML_INT ldy)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For zcsrsm, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSR format. The length is pntre[m-1] - pntrb[0]. For zcsrsm, val is a double-precision complex number array.",
        "indx: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing row indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "pntre: Array of length m, containing row indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in row i in the val and indx arrays.",
        "x: Array value of matrix x. For zcsrsm, x is a double-precision complex number array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For zcsrsm, y is a double-precision complex number array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scscsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSC格式",
      "desc_en": "single-precision real type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scscsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const float alpha, const char *matdescra, const float *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const float *x, const KML_INT ldx, float *y , const KML_INT ldy)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For scscsm, alpha is of the single-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For scscsm, val is a single-precision floating-point array.",
        "indx: Array columns in the CSC format, which contains the row indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Array value of matrix x. For scscsm, x is a single-precision floating-point array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For scscsm, y is a single-precision floating-point array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcscsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSC格式",
      "desc_en": "double-precision real type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcscsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const double alpha, const char *matdescra, const double *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre,const double *x, const KML_INT ldx, double *y , const KML_INT ldy)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For dcscsm, alpha is of the double-precision floating-point type.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For dcscsm, val is a double-precision floating-point array.",
        "indx: Array columns in the CSC format, which contains the row indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Array value of matrix x. For dcscsm, x is a double-precision floating-point array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For dcscsm, y is a double-precision floating-point array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccscsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSC格式",
      "desc_en": "single-precision complex type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccscsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_Complex8 alpha, const char *matdescra, const KML_Complex8 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex8 *x, const KML_INT ldx, KML_Complex8 *y, const KML_INT ldy)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For ccscsm, alpha is a single-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For ccscsm, val is a single-precision complex number array.",
        "indx: Array columns in the CSC format, which contains the row indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Array value of matrix x. For ccscsm, x is a single-precision complex number array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For ccscsm, y is a single-precision complex number array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcscsm",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，求解多个右端顶的稀疏线性方程组，矩阵式CSC格式",
      "desc_en": "double-precision complex type, Solving a sparse system of linear equations with multiple right-hand terms, the matrix is in CSC format",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcscsm(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_Complex16 alpha, const char *matdescra, const KML_Complex16 *val, const KML_INT *indx, const KML_INT *pntrb, const KML_INT *pntre, const KML_Complex16 *x, const KML_INT ldx, KML_Complex16 *y , const KML_INT ldy)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt = KML_SPARSE_OPERATION_NON_TRANSPOSE, then A * y = alpha * x. If opt = KML_SPARSE_OPERATION_TRANSPOSE, then A ^ T * y = alpha * x. If opt = KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then A ^ H * y = alpha * x",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix x. The value range is [1, MAX_KML_INT].",
        "alpha: Scalar alpha. For zcscsm, alpha is a double-precision complex number.",
        "matdescra: Matrix operation attribute. For details, see the description of matdescra.",
        "val: Array values storing non-zero elements of matrix A in the CSC format. The length is pntre[m-1] - pntrb[0]. For zcscsm, val is a double-precision complex number array.",
        "indx: Array columns in the CSC format, which contains the row indices for non-zero elements in matrix A.",
        "pntrb: Array of length m, containing column indices of matrix A. pntrb[i] - pntrb[0] indicates the subscript of the first non-zero element in column i in the val and indx arrays.",
        "pntre: Array of length m, containing column indices of matrix A. pntre[i] - pntrb[0]-1 indicates the subscript of the last non-zero element in column i in the val and indx arrays.",
        "x: Array value of matrix x. For zcscsm, x is a double-precision complex number array.",
        "ldx: Size of the leading dimension of matrix x for one-based indexing, Size of the second dimension of matrix x for zero-based indexing.",
        "y: Array value of matrix y. For zcscsm, y is a double-precision complex number array.",
        "ldy: Size of the leading dimension of matrix y for one-based indexing, Size of the second dimension of matrix y for zero-based indexing"
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsradd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与矩阵相加，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "single-precision real type, Computes the sum of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsradd(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const float *a, const KML_INT *ja, KML_INT *ia, const float beta, const float *b, const KML_INT *jb, const KML_INT *ib, float *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. If opt is set to KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = A + beta * B. If opt is set to KML_SPARSE_OPERATION_TRANSPOSE, then C = A + beta * B ^ T. If opt is set to KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = A + beta * B ^ H.",
        "request: 0: The function performs addition. The memory for ic, jc, and c must be allocated in advance. 1: The function computes only the values of array ic whose length is m+1. The memory for array ic must be allocated in advance. 2: The function computes only the values of jc and c whose length is ic[m]-1, after the function is called previously with the parameter request=1 and the values of ic are obtained.",
        "sort: 1: The function sorts column indices in ja. 2: The function sorts column indices in jb. 3: The function sorts column indices in ja and jb. Other: The function does not sort column indices in ja or jb. In this case, the column indices in ja and jb have been sorted by default.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A.",
        "a: Array containing non-zero elements of matrix A. For scsradd, a is a single-precision floating-point array.",
        "ja: Array containing column indices in the CSR format.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "beta: Scalar beta. For scsradd, beta is of the single-precision floating-point type.",
        "b: Array containing non-zero elements of matrix B. For scsradd, b is a single-precision floating-point array.",
        "jb: Array containing column indices in the CSR format.",
        "ib: Array containing row indices of sparse matrix B in the CSR format (3-array variation). The array size is m+1 when the matrix is not transposed or n+1 when the matrix is transposed. ib[i] - ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array containing non-zero elements of matrix C. For scsradd, c is a single-precision floating-point array.",
        "jc: Array containing column indices in the CSR format",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] - ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: Maximum number of non-zero elements in matrix C. The function stops computing when the number of non-zero elements exceeds the specified value of nzmax. This parameter is valid only when request is set to 0 or 2."
      ],
      "return": ""
    },
    {
      "name": "kml_csparse_dcsradd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与矩阵相加，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "double-precision real type, Computes the sum of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsradd(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const double *a, const KML_INT *ja, const KML_INT *ia, const double beta, const double *b, const KML_INT *jb, const KML_INT *ib, double *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt is set to KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = A + beta * B. If opt is set to KML_SPARSE_OPERATION_TRANSPOSE, then C = A + beta * B ^ T. If opt is set to KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = A + beta * B ^ H.",
        "request: 0: The function performs addition. The memory for ic, jc, and c must be allocated in advance. 1: The function computes only the values of array ic whose length is m+1. The memory for array ic must be allocated in advance. 2: The function computes only the values of jc and c whose length is ic[m]-1, after the function is called previously with the parameter request=1 and the values of ic are obtained.",
        "sort: 1: The function sorts column indices in ja. 2: The function sorts column indices in jb. 3: The function sorts column indices in ja and jb.Other: The function does not sort column indices in ja or jb. In this case, the column indices in ja and jb have been sorted by default.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A.",
        "a: Array containing non-zero elements of matrix A. For dcsradd, a is a double-precision floating-point array.",
        "ja: Array containing column indices in the CSR format.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "beta: Scalar beta. For dcsradd, beta is of the double-precision floating-point type.",
        "b: Array containing non-zero elements of matrix B. For dcsradd, b is a double-precision floating-point array.",
        "jb: Array containing column indices in the CSR format.",
        "ib: Array containing row indices of sparse matrix B in the CSR format (3-array variation). The array size is m+1 when the matrix is not transposed or n+1 when the matrix is transposed. ib[i] - ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array containing non-zero elements of matrix C. For dcsradd, c is a double-precision floating-point array.",
        "jc: Array containing column indices in the CSR format.",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] - ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: Maximum number of non-zero elements in matrix C. The function stops computing when the number of non-zero elements exceeds the specified value of nzmax. This parameter is valid only when request is set to 0 or 2."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccsradd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与矩阵相加，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "single-precision complex type, Computes the sum of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsradd(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const KML_Complex8 *a, const KML_INT *ja, const KML_INT *ia, const KML_Complex8 beta, const KML_Complex8 *b, const KML_INT *jb, const KML_INT *ib, KML_Complex8 *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt is set to KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = A + beta * B. If opt is set to KML_SPARSE_OPERATION_TRANSPOSE, then C = A + beta * B ^ T. If opt is set to KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = A + beta * B ^ H.",
        "request: 0: The function performs addition. The memory for ic, jc, and c must be allocated in advance. 1: The function computes only the values of array ic whose length is m+1. The memory for array ic must be allocated in advance. 2: The function computes only the values of jc and c whose length is ic[m]-1, after the function is called previously with the parameter request=1 and the values of ic are obtained.",
        "sort: 1: The function sorts column indices in ja. 2: The function sorts column indices in jb. 3: The function sorts column indices in ja and jb.Other: The function does not sort column indices in ja or jb. In this case, the column indices in ja and jb have been sorted by default.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A.",
        "a: Array containing non-zero elements of matrix A. For ccsradd, a is a single-precision complex number array.",
        "ja: Array containing column indices in the CSR format.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "beta: Scalar beta. For ccsradd, beta is a single-precision complex number.",
        "b: Array containing non-zero elements of matrix B. For ccsradd, b is a single-precision complex number array.",
        "jb: Array containing column indices in the CSR format.",
        "ib: Array containing row indices of sparse matrix B in the CSR format (3-array variation). The array size is m+1 when the matrix is not transposed or n+1 when the matrix is transposed. ib[i] - ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array containing non-zero elements of matrix C. For ccsradd, c is a single-precision complex number array.",
        "jc: Array containing column indices in the CSR format.",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] - ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: Maximum number of non-zero elements in matrix C. The function stops computing when the number of non-zero elements exceeds the specified value of nzmax. This parameter is valid only when request is set to 0 or 2."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcsradd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与矩阵相加，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "double-precision complex type, Computes the sum of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsradd(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const KML_Complex16 *a, const KML_INT *ja, const KML_INT *ia, const KML_Complex16 beta, const KML_Complex16 *b, const KML_INT *jb, const KML_INT *ib, KML_Complex16 *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "parameters": [
        "opt: Indicates whether to transpose. If opt is set to KML_SPARSE_OPERATION_NON_TRANSPOSE, then C = A + beta * B. If opt is set to KML_SPARSE_OPERATION_TRANSPOSE, then C = A + beta * B ^ T. If opt is set to KML_SPARSE_OPERATION_CONJUGATE_TRANSPOSE, then C = A + beta * B ^ H.",
        "request: 0: The function performs addition. The memory for ic, jc, and c must be allocated in advance. 1: The function computes only the values of array ic whose length is m+1. The memory for array ic must be allocated in advance. 2: The function computes only the values of jc and c whose length is ic[m]-1, after the function is called previously with the parameter request=1 and the values of ic are obtained.",
        "sort: 1: The function sorts column indices in ja. 2: The function sorts column indices in jb. 3: The function sorts column indices in ja and jb.Other: The function does not sort column indices in ja or jb. In this case, the column indices in ja and jb have been sorted by default.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A.",
        "a: Array containing non-zero elements of matrix A. For zcsradd, a is a double-precision complex number array.",
        "ja: Array containing column indices in the CSR format.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "beta: Scalar beta. For zcsradd, beta is a double-precision complex number.",
        "b: Array containing non-zero elements of matrix B. For zcsradd, b is a double-precision complex number array.",
        "jb: Array containing column indices in the CSR format.",
        "ib: Array containing row indices of sparse matrix B in the CSR format (3-array variation). The array size is m+1 when the matrix is not transposed or n+1 when the matrix is transposed. ib[i] - ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array containing non-zero elements of matrix C. For zcsradd, c is a double-precision complex number array.",
        "jc: Array containing column indices in the CSR format.",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] - ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: Maximum number of non-zero elements in matrix C. The function stops computing when the number of non-zero elements exceeds the specified value of nzmax. This parameter is valid only when request is set to 0 or 2."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrmultcsr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "single-precision real type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrmultcsr(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const KML_INT k, const float *a, const KML_INT *ja, const KML_INT *ia, const float *b, const KML_INT *jb, const KML_INT *ib, float *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0: C = alpha * A * B + beta * Copt = 1: C = alpha * A ^ T * B + beta * Copt = 2: C = alpha * A ^ H * B + beta * C.",
        "request: request = 0: The memory for ic, jc, and c is allocated by the caller.request = 1: Only ic is updated. The memory for ic is allocated by the caller.request = 2: The function has been called previously with request=1 and jc and c are updated.",
        "sort: The sort parameter is not defined.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For scsrmultcsr, a is a single-precision floating-point array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For scsrmultcsr, b is a single-precision floating-point array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array of length m+1, containing row indices of matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "c: Array values storing non-zero elements of matrix C in the CSR format. For scsrmultcsr, c is a single-precision floating-point array.",
        "jc: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix C.",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] – ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: This parameter is used if the value of request is 0 or 2. The function stops computing if the memory required by C exceeds the specified value of nzmax."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcsrmultcsr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "double-precision real type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrmultcsr(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const KML_INT k, const double *a, const KML_INT *ja, const KML_INT *ia, const double *b, const KML_INT *jb, const KML_INT *ib, double *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0: C = alpha * A * B + beta * Copt = 1: C = alpha * A ^ T * B + beta * Copt = 2: C = alpha * A ^ H * B + beta * C.",
        "request: request = 0: The memory for ic, jc, and c is allocated by the caller.request = 1: Only ic is updated. The memory for ic is allocated by the caller.request = 2: The function has been called previously with request=1 and jc and c are updated.",
        "sort: The sort parameter is not defined.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For dcsrmultcsr, a is a double-precision floating-point array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For dcsrmultcsr, b is a double-precision floating-point array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array of length m+1, containing row indices of matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "c: Array values storing non-zero elements of matrix C in the CSR format. For dcsrmultcsr, c is a double-precision floating-point array.",
        "jc: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix C.",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] – ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: This parameter is used if the value of request is 0 or 2. The function stops computing if the memory required by C exceeds the specified value of nzmax."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccsrmultcsr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "single-precision complex type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrmultcsr(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex8 *a, const KML_INT *ja, const KML_INT *ia, const KML_Complex8 *b, const KML_INT *jb, const KML_INT *ib, KML_Complex8 *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0: C = alpha * A * B + beta * Copt = 1: C = alpha * A ^ T * B + beta * Copt = 2: C = alpha * A ^ H * B + beta * C.",
        "request: request = 0: The memory for ic, jc, and c is allocated by the caller.request = 1: Only ic is updated. The memory for ic is allocated by the caller.request = 2: The function has been called previously with request=1 and jc and c are updated.",
        "sort: The sort parameter is not defined.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For ccsrmultcsr, a is a single-precision complex number array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For ccsrmultcsr, b is a single-precision complex number array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array of length m+1, containing row indices of matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "c: Array values storing non-zero elements of matrix C in the CSR format. For ccsrmultcsr, c is a single-precision complex number array.",
        "jc: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix C.",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] – ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: This parameter is used if the value of request is 0 or 2. The function stops computing if the memory required by C exceeds the specified value of nzmax."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcsrmultcsr",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稀疏矩阵",
      "desc_en": "double-precision complex type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a sparse matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrmultcsr(const kml_sparse_operation_t opt, const KML_INT request, const KML_INT sort, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex16 *a, const KML_INT *ja, const KML_INT *ia, const KML_Complex16 *b, const KML_INT *jb, const KML_INT *ib, KML_Complex16 *c, KML_INT *jc, KML_INT *ic, const KML_INT nzmax)",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0: C = alpha * A * B + beta * Copt = 1: C = alpha * A ^ T * B + beta * Copt = 2: C = alpha * A ^ H * B + beta * C.",
        "request: request = 0: The memory for ic, jc, and c is allocated by the caller.request = 1: Only ic is updated. The memory for ic is allocated by the caller.request = 2: The function has been called previously with request=1 and jc and c are updated.",
        "sort: The sort parameter is not defined.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For zcsrmultcsr, a is a double-precision complex number array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For zcsrmultcsr, b is a double-precision complex number array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array of length m+1, containing row indices of matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "c: Array values storing non-zero elements of matrix C in the CSR format. For zcsrmultcsr, c is a double-precision complex number array.",
        "jc: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix C.",
        "ic: Array of length m+1, containing row indices of matrix C. ic[i] – ic[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "nzmax: This parameter is used if the value of request is 0 or 2. The function stops computing if the memory required by C exceeds the specified value of nzmax."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_sparse_scsrmultd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度实数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稠密矩阵",
      "desc_en": "single-precision real type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a dense matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_scsrmultd(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const float *a, const KML_INT *ja, const KML_INT *ia, const float *b, const KML_INT *jb, const KML_INT *ib, float *c, const KML_INT ldc)",
      "headerfile_desc": "kspblas.h",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0, C = alpha * A * B + beta * C.opt = 1: C = alpha * A ^ T * B + beta * C.opt = 2: C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For scsrmultd, a is a single-precision floating-point array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For scsrmultd, b is a single-precision floating-point array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array containing row indices of elements in matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array of matrix C. For scsrmultd, c is a single-precision floating-point array.",
        "ldc: Leading dimension of dense matrix C. ldc*k <= MAX_KML_INT."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_dcsrmultd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度实数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稠密矩阵",
      "desc_en": "double-precision real type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a dense matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_dcsrmultd(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const double *a, const KML_INT *ja, const KML_INT *ia, const double *b, const KML_INT *jb, const KML_INT *ib, double *c, const KML_INT ldc)",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0, C = alpha * A * B + beta * C.opt = 1: C = alpha * A ^ T * B + beta * C.opt = 2: C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For dcsrmultd, a is a double-precision floating-point array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For dcsrmultd, b is a double-precision floating-point array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array containing row indices of elements in matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array of matrix C. For dcsrmultd, c is a double-precision floating-point array.",
        "ldc: Leading dimension of dense matrix C. ldc*k <= MAX_KML_INT."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_ccsrmultd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "单精度复数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稠密矩阵",
      "desc_en": "single-precision complex type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a dense matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_ccsrmultd(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex8 *a, const KML_INT *ja, const KML_INT *ia, const KML_Complex8 *b, const KML_INT *jb, const KML_INT *ib, KML_Complex8 *c, const KML_INT ldc)",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0, C = alpha * A * B + beta * C.opt = 1: C = alpha * A ^ T * B + beta * C.opt = 2: C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For ccsrmultd, a is a single-precision complex number array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For ccsrmultd, b is a single-precision complex number array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array containing row indices of elements in matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array of matrix C. For ccsrmultd, c is a single-precision complex number array.",
        "ldc: Leading dimension of dense matrix C. ldc*k <= MAX_KML_INT."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_csparse_zcsrmultd",
      "library": "KML_SPBLAS",
      "headerfile": "",
      "desc_cn": "双精度复数类型，矩阵与矩阵相乘，矩阵是CSR格式(三数组，矩阵index从1开始)的稀疏矩阵，结果存储在稠密矩阵",
      "desc_en": "double-precision complex type, Computes the product of two sparse matrices that are stored in the CSR format(3-array variation)with one-based indexing.The result is stored in a dense matrix",
      "benefit_cn": "基于鲲鹏架构为压缩格式的稀疏矩阵提供高性能向量、矩阵运算",
      "benefit_en": "Based on the Kunpeng architecture provides high-performance vector and matrix operations for sparse matrices in compressed formats",
      "func_name": "kml_sparse_status_t kml_sparse_zcsrmultd(const kml_sparse_operation_t opt, const KML_INT m, const KML_INT n, const KML_INT k, const KML_Complex16 *a, const KML_INT *ja, const KML_INT *ia, const KML_Complex16 *b, const KML_INT *jb, const KML_INT *ib, KML_Complex16 *c, const KML_INT ldc)",
      "parameters": [
        "opt: Indicates whether to transpose. opt = 0, C = alpha * A * B + beta * C.opt = 1: C = alpha * A ^ T * B + beta * C.opt = 2: C = alpha * A ^ H * B + beta * C.",
        "m: Number of rows in matrix A. The value range is [1, MAX_KML_INT].",
        "n: Number of columns in matrix A. The value range is [1, MAX_KML_INT].",
        "k: Number of columns in matrix B. The value range is [1, MAX_KML_INT].",
        "a: Array values storing non-zero elements of matrix A in the CSR format. For zcsrmultd, a is a double-precision complex number array.",
        "ja: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix A.",
        "ia: Array of length m+1, containing row indices of matrix A. ia[i] - ia[0] indicates the subscript of the first non-zero element in row i in the val and indx arrays.",
        "b: Array values storing non-zero elements of matrix B in the CSR format. For zcsrmultd, b is a double-precision complex number array.",
        "jb: Array columns in the CSR format, which contains the column indices for non-zero elements in matrix B.",
        "ib: Array containing row indices of elements in matrix B. ib[i] – ib[0] indicates the subscript of the first non-zero element in the ith row in the val and indx arrays.",
        "c: Array of matrix C. For zcsrmultd, c is a double-precision complex number array.",
        "ldc: Leading dimension of dense matrix C. ldc*k <= MAX_KML_INT."
      ],
      "return": "Function execution status. The enumeration type is kml_sparse_status_t."
    },
    {
      "name": "kml_fft_plan_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列n维C2C变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft(int rank, const int *n, kml_fft_complex *in, kml_fft_complex *out, int sign, unsigned flags)",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 ≤ rank ≤ 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] ≥ 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列n维C2C变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft(int rank, const int *n, kml_fftf_complex *in, kml_fftf_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 ≤ rank ≤ 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] ≥ 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_dft_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列1维C2C变换的plan",
      "desc_en": "double-precision type, create a plan for the one-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_1d(int n, kml_fft_complex *in, kml_fft_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n ≥ 1.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_dft_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列1维C2C变换的plan",
      "desc_en": "single-precision type, create a plan for the one-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_1d(int n, kml_fftf_complex *in, kml_fftf_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n ≥ 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_dft_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列2维C2C变换的plan",
      "desc_en": "double-precision type, create a plan for the two-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_2d(int n0, int n1, kml_fft_complex *in, kml_fft_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 ≥ 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 ≥ 1.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_dft_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列2维C2C变换的plan",
      "desc_en": "single-precision type, create a plan for the two-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_2d(int n0, int n1, kml_fftf_complex *in, kml_fftf_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 ≥ 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 ≥ 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_dft_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列3维C2C变换的plan",
      "desc_en": "double-precision type, create a plan for the three-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_3d(int n0, int n1, int n2, kml_fft_complex *in, kml_fft_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 ≥ 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 ≥ 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2≥ 1.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_dft_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列3维C2C变换的plan",
      "desc_en": "single-precision type, create a plan for the three-dimensional complex-to-complex(C2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_3d(int n0, int n1, int n2, kml_fftf_complex *in, kml_fftf_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 ≥ 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 ≥ 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2≥ 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_guru_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru_dft(int rank, const kml_fft_iodim *dims, int howmany_rank, const kml_fft_iodim *howmany_dims, kml_fft_complex *in, kml_fft_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. The constraint is 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim*",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_guru_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru_dft(int rank, const kml_fftf_iodim *dims, int howmany_rank, const kml_fftf_iodim *howmany_dims, kml_fftf_complex *in, kml_fftf_complex *out, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. The constraint is 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim*",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_guru_split_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru_split_dft(int rank, const kml_fft_iodim *dims, int howmany_rank, const kml_fft_iodim *howmany_dims, double *ri, double *ii, double *ro, double *io, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim*",
        "ri: Inputs the real part of the data to be transformed. Double precision: double*",
        "ii: Inputs the imaginary part of the data to be transformed. Double precision: double*",
        "ro: Outputs the real part of the data to be transformed. Double precision: double*",
        "io: Outputs the imaginary part of the data to be transformed. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft function as a parameter to perform FFT on the new input (ri, ii) and output (ro, io)."
    },
    {
      "name": "kml_fftf_plan_guru_split_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru_split_dft(int rank, const kml_fftf_iodim *dims, int howmany_rank, const kml_fftf_iodim *howmany_dims, float *ri, float *ii, float *ro, float *io, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim*",
        "ri: Inputs the real part of the data to be transformed. Single precision: float*",
        "ii: Inputs the imaginary part of the data to be transformed. Single precision: float*",
        "ro: Outputs the real part of the data to be transformed. Single precision: float*",
        "io: Outputs the imaginary part of the data to be transformed. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft function as a parameter to perform FFT on the new input (ri, ii) and output (ro, io)."
    },
    {
      "name": "kml_fft_plan_guru64_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru64_dft(int rank, const kml_fft_iodim64 *dims, int howmany_rank, const kml_fft_iodim64 *howmany_dims, kml_fft_complex *in, kml_fft_complex *out, int sign, unsigned",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. ptrdiff_t n: FFT length of the i-th dimension. ptrfiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrfiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim64*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrfiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrfiff_t os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim64*",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_guru64_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru64_dft(int rank, const kml_fftf_iodim64 *dims, int howmany_rank, const kml_fftf_iodim64 *howmany_dims, kml_fftf_complex *in, kml_fftf_complex *out, int sign, unsigned",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. ptrdiff_t n: FFT length of the i-th dimension. ptrfiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrfiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim64*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrfiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrfiff_t os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim64*",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_guru64_split_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru64_split_dft(int rank, const kml_fft_iodim64 *dims, int howmany_rank, const kml_fft_iodim64 *howmany_dims, double *ri, double *ii, double *ro, double *io, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. ptrdiff_t n: FFT length of the i-th dimension. ptrfiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrfiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim64*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrfiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrfiff_t os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim64*",
        "ri: Inputs the real part of the data to be transformed. Double precision: double*",
        "ii: Inputs the imaginary part of the data to be transformed. Double precision: double*",
        "ro: Outputs the real part of the data to be transformed. Double precision: double*",
        "io: Outputs the imaginary part of the data to be transformed. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft function as a parameter to perform FFT on the new input (ri, ii) and output (ro, io)."
    },
    {
      "name": "kml_fftf_plan_guru64_split_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of a multiple data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru64_split_dft(int rank, const kml_fftf_iodim64 *dims, int howmany_rank, const kml_fftf_iodim64 *howmany_dims, float *ri, float *ii, float *ro, float *io, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 ≤ rank ≤ 3.",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members:. ptrdiff_t n: FFT length of the i-th dimension. ptrfiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrfiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n ≥ 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim64*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 ≤ howmany_rank ≤ 3.",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members:. ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrfiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrfiff_t os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim64*",
        "ri: Inputs the real part of the data to be transformed. Single precision: float*",
        "ii: Inputs the imaginary part of the data to be transformed. Single precision: float*",
        "ro: Outputs the real part of the data to be transformed. Single precision: float*",
        "io: Outputs the imaginary part of the data to be transformed. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft function as a parameter to perform FFT on the new input (ri, ii) and output (ro, io)."
    },
    {
      "name": "kml_fft_plan_many_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立howmany组数据序列n维C2C变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_many_dft(int rank, const int *n, int howmany, kml_fft_complex *in, const int *inembed, int istride, int idist, kml_fft_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 ≤ rank ≤ 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] ≥ 1, for i in 0, rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] ≥ n[i] for i in 0, rank-1 . Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] ≥ n[i] for i in 0, rank-1. Or if onembed = NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_many_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立howmany组数据序列n维C2C变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-complex(C2C)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_many_dft(int rank, const int *n, int howmany, kml_fftf_complex *in, const int *inembed, int istride, int idist, kml_fftf_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 ≤ rank ≤ 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] ≥ 1, for i in 0, rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] ≥ n[i] for i in 0, rank-1 . Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] ≥ n[i] for i in 0, rank-1. Or if onembed = NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "sign: Specifies forward or backward transform. -1(KML_FFT_FORWARD): forward, +1(KML_FFT_BACKWARD): backward.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列n维R2C变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_r2c(int rank, const int *n, double *in, kml_fft_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列n维R2C变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_r2c(int rank, const int *n, float *in, kml_fftf_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_dft_r2c_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列1维R2C变换的plan",
      "desc_en": "double-precision type, create a plan for the one-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_r2c_1d(int n, double *in, kml_fft_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n >= 1.",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fftf_plan_dft_r2c_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列1维R2C变换的plan",
      "desc_en": "single-precision type, create a plan for the one-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_r2c_1d(int n, float *in, kml_fftf_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n >= 1.",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output."
    },
    {
      "name": "kml_fft_plan_dft_r2c_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列2维R2C变换的plan",
      "desc_en": "double-precision type, create a plan for the two-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_r2c_2d(int n0, int n1, double *in, kml_fft_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output.If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_dft_r2c_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列2维R2C变换的plan",
      "desc_en": "single-precision type, create a plan for the two-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_r2c_2d(int n0, int n1, float *in, kml_fftf_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output.If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_dft_r2c_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列3维R2C变换的plan",
      "desc_en": "double-precision type, create a plan for the three-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_r2c_3d(int n0, int n1, int n2, double *in, kml_fft_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2 >= 1.",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_dft_r2c_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列3维R2C变换的plan",
      "desc_en": "single-precision type, create a plan for the three-dimensional real-to-complex(R2C)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_r2c_3d(int n0, int n1, int n2, double *in, kml_fft_complex *out, unsigned flags)",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2 >= 1.",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru_dft_r2c(int rank, const kml_fft_iodim *dims, int howmany_rank, const kml_fft_iodim *howmany_dims, double *in, kml_fft_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim*",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru_dft_r2c(int rank, const kml_fftf_iodim *dims, int howmany_rank, const kml_fftf_iodim *howmany_dims, float *in, kml_fftf_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim*",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru_split_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru_split_dft_r2c(int rank, const kml_fft_iodim *dims, int howmany_rank, const kml_fft_iodim *howmany_dims, double *in, double *ro, double *io, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim*",
        "in: Inputs the data to be transformed. Double precision: double*",
        "ro: Outputs the real part of the data to be transformed. Double precision: double*",
        "io: Outputs the imaginary part of the data to be transformed. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (in) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_r2c function as a parameter to perform FFT on the new input (in) and output (ro, io). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru_split_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru_split_dft_r2c(int rank, const kml_fftf_iodim *dims, int howmany_rank, const kml_fftf_iodim *howmany_dims, float *in, float *ro, float *io, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim*",
        "in: Inputs the data to be transformed. Single precision: float*",
        "ro: Outputs the real part of the data to be transformed. Single precision: float*",
        "io: Outputs the imaginary part of the data to be transformed. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (in) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_r2c function as a parameter to perform FFT on the new input (in) and output (ro, io). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru64_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru64_dft_r2c(int rank, const kml_fft_iodim64 *dims, int howmany_rank, const kml_fft_iodim64 *howmany_dims, double *in, kml_fft_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim64*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim64*",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru64_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru64_dft_r2c(int rank, const kml_fftf_iodim64 *dims, int howmany_rank, const kml_fftf_iodim64 *howmany_dims, float *in, kml_fftf_complex *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim64*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim64*",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru64_split_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan_guru64_split_dft_r2c(rank, dims, howmany_rank, howmany_dims, in, ro, io, KML_FFT_ESTIMATE)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fft_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fft_iodim64 *",
        "in: Inputs the data to be transformed. Double precision: double*",
        "ro: Outputs the real part of the data to be transformed. Double precision: double*",
        "io: Outputs the imaginary part of the data to be transformed. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (in) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_r2c function as a parameter to perform FFT on the new input (in) and output (ro, io). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru64_split_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维R2C变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of a multiple data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru64_split_dft_r2c(int rank, const kml_fftf_iodim64 *dims, int howmany_rank, const kml_fftf_iodim64 *howmany_dims, float *in, float *ro, float *io, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fftf_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fftf_iodim64 *",
        "in: Inputs the data to be transformed. Single precision: float*",
        "ro: Outputs the real part of the data to be transformed. Single precision: float*",
        "io: Outputs the imaginary part of the data to be transformed. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (in) and output (ro, io). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_r2c function as a parameter to perform FFT on the new input (in) and output (ro, io). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_many_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立howmany组数据序列n维R2C变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_many_dft_r2c(int rank, const int *n, int howmany, double *in, const int *inembed, int istride, int idist, kml_fft_complex *out, const int *onembed, int ostride, int odist, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed. Double precision: double*",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] >= n[i] for i in 0, rank-1. Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] >= n[i] for i in 0, rank-1 . Or if onembed == NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_many_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立howmany组数据序列n维R2C变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-complex(R2C)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_many_dft_r2c(int rank, const int *n, int howmany, float *in, const int *inembed, int istride, int idist, kml_fftf_complex *out, const int *onembed, int ostride, int odist, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed. Single precision: float*",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] >= n[i] for i in 0, rank-1. Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] >= n[i] for i in 0, rank-1 . Or if onembed == NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_r2c function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列n维c2r变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-real(c2r)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_c2r(int rank, const int *n, kml_fft_complex *in, double *out, unsigned flags);",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列n维C2R变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_c2r(int rank, const int *n, kml_fftf_complex *in, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_dft_c2r_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列1维C2R变换的plan",
      "desc_en": "double-precision type, create a plan for the one-dimensional complex-to-real(C2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_c2r_1d(int n, kml_fft_complex *in, double *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n >= 1.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_dft_c2r_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列1维C2R变换的plan",
      "desc_en": "single-precision type, create a plan for the one-dimensional complex-to-real(C2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_c2r_1d(int n, kml_fftf_complex *in, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n >= 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_dft_c2r_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列2维C2R变换的plan",
      "desc_en": "double-precision type, create a plan for the two-dimensional complex-to-real(C2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_c2r_2d(int n0, int n1, kml_fft_complex *in, double *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_dft_c2r_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列2维C2R变换的plan",
      "desc_en": "single-precision type, create a plan for the two-dimensional complex-to-real(C2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_c2r_2d(int n0, int n1, kml_fftf_complex *in, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_dft_c2r_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列3维C2R变换的plan",
      "desc_en": "double-precision type, create a plan for the three-dimensional complex-to-real(C2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_dft_c2r_3d(int n0, int n1, int n2, kml_fft_complex *in, double *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2 >= 1.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_dft_c2r_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列3维C2R变换的plan",
      "desc_en": "single-precision type, create a plan for the three-dimensional complex-to-real(C2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_dft_c2r_3d(int n0, int n1, int n2, kml_fftf_complex *in, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2 >= 1.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru_dft_c2r(int rank, const kml_fft_iodim *dims, int howmany_rank, const kml_fft_iodim *howmany_dims, kml_fft_complex *in, double *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim*",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru_dft_c2r(int rank, const kml_fftf_iodim *dims, int howmany_rank, const kml_fftf_iodim *howmany_dims, kml_fft_complex *in, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim*",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru_split_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru_split_dft_c2r(int rank, const kml_fft_iodim *dims, int howmany_rank, const kml_fft_iodim *howmany_dims, double *ri, double *ii, double *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim*",
        "ri: Inputs the real part of the data to be transformed. Double precision: double*",
        "ii: Inputs the imaginary part of the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (out). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_c2r function as a parameter to perform FFT on the new input (ri, ii) and output (out). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru_split_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru_split_dft_c2r(int rank, const kml_fftf_iodim *dims, int howmany_rank, const kml_fftf_iodim *howmany_dims, float *ri, float *ii, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim*",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim*",
        "ri: Inputs the real part of the data to be transformed. Single precision: float*",
        "ii: Inputs the imaginary part of the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (out). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_c2r function as a parameter to perform FFT on the new input (ri, ii) and output (out). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru64_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru64_dft_c2r(int rank, const kml_fft_iodim64 *dims, int howmany_rank, const kml_fft_iodim64 *howmany_dims, kml_fft_complex *in, double *out, unsigned flags);",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim64 *",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru64_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru64_dft_c2r(int rank, const kml_fftf_iodim64 *dims, int howmany_rank, const kml_fftf_iodim64 *howmany_dims, kml_fftf_complex *in, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim64 *",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru64_split_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru64_split_dft_c2r(int rank, const kml_fft_iodim64 *dims, int howmany_rank, const kml_fft_iodim64 *howmany_dims, double *ri, double *ii, double *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim64 *",
        "ri: Inputs the real part of the data to be transformed. Double precision: double*",
        "ii: Inputs the imaginary part of the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (out). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_c2r function as a parameter to perform FFT on the new input (ri, ii) and output (out). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru64_split_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维C2R变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of a multiple data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru64_split_dft_c2r(int rank, const kml_fftf_iodim64 *dims, int howmany_rank, const kml_fftf_iodim64 *howmany_dims, float *ri, float *ii, float *out, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim64 *",
        "ri: Inputs the real part of the data to be transformed. Single precision: float*",
        "ii: Inputs the imaginary part of the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input (ri, ii) and output (out). In addition, the object may also be added into the kml_fft(f)_execute_split_dft_c2r function as a parameter to perform FFT on the new input (ri, ii) and output (out). If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_many_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立howmany组数据序列n维C2R变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_many_dft_c2r(int rank, const int *n, int howmany, kml_fft_complex *in, const int *inembed, int istride, int idist, double *out, const int *onembed, int ostride, int odist, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] >= n[i] for i in 0, rank-1. Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] >= n[i] for i in 0, rank-1. Or if onembed == NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_many_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立howmany组数据序列n维C2R变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional complex-to-real(C2R)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_many_dft_c2r(int rank, const int *n, int howmany, kml_fftf_complex *in, const int *inembed, int istride, int idist, float *out, const int *onembed, int ostride, int odist, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] >= n[i] for i in 0, rank-1. Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] >= n[i] for i in 0, rank-1. Or if onembed == NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_dft_c2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列n维R2R变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_r2r(int rank, const int *n, double *in, double *out, const kml_fft_r2r_kind *kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: const kml_fft_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in 7.3.5.1 the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the 7.3.5.5 kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列n维R2R变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_r2r(int rank, const int *n, float *in, float *out, const kml_fftf_r2r_kind *kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: const kml_fftf_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in 7.3.5.1 the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the 7.3.5.5 kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_r2r_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列1维R2R变换的plan",
      "desc_en": "double-precision type, create a plan for the one-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_r2r_1d(int n, double *in, double *out, kml_fft_r2r_kind kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n ≥ 1.",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "kind: kind indicates the R2R transform type of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: kml_fft_r2r_kind",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_r2r_1d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列1维R2R变换的plan",
      "desc_en": "single-precision type, create a plan for the one-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_r2r_1d(int n, float *in, float *out, kml_fftf_r2r_kind kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Inputs the FFT sequence size. The constraint is n ≥ 1.",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "kind: kind indicates the R2R transform type of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: kml_fftf_r2r_kind",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_r2r_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列2维R2R变换的plan",
      "desc_en": "double-precision type, create a plan for the two-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_r2r_2d(int n0, int n1, double *in, double *out, kml_fft_r2r_kind kind0, kml_fft_r2r_kind kind1, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "kind0: kind0 indicates the R2R transform type in the first dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: kml_fft_r2r_kind*",
        "kind1: kind1 indicates the R2R transform type in the second dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: kml_fft_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_r2r_2d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列2维R2R变换的plan",
      "desc_en": "single-precision type, create a plan for the two-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_r2r_2d(int n0, int n1, float *in, float *out, kml_fftf_r2r_kind kind0, kml_fftf_r2r_kind kind1, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "kind0: kind0 indicates the R2R transform type in the first dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: kml_fftf_r2r_kind*",
        "kind1: kind1 indicates the R2R transform type in the second dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: kml_fftf_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_r2r_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立单个连续数据序列3维R2R变换的plan",
      "desc_en": "double-precision type, create a plan for the three-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_r2r_3d(int n0, int n1, int n2, double *in, double *out, kml_fft_r2r_kind kind0, kml_fft_r2r_kind kind1, kml_fft_r2r_kind kind2, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2 >= 1.",
        "in: Inputs the data to be transformed. Double precision: double*",
        "out: Outputs the data generated using FFT. Double precision: double*",
        "kind0: kind0 indicates the R2R transform type in the first dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: kml_fft_r2r_kind*",
        "kind1: kind1 indicates the R2R transform type in the second dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: kml_fft_r2r_kind*",
        "kind2: kind2 indicates the R2R transform type in the third dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: kml_fft_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_r2r_3d",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立单个连续数据序列3维R2R变换的plan",
      "desc_en": "single-precision type, create a plan for the three-dimensional real-to-real(R2R)transform of a single contiguous data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_r2r_3d(int n0, int n1, int n2, float *in, float *out, kml_fftf_r2r_kind kind0, kml_fftf_r2r_kind kind1, kml_fftf_r2r_kind kind2, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n0: Inputs the size of the first dimension in the FFT sequence. The constraint is n0 >= 1.",
        "n1: Inputs the size of the second dimension in the FFT sequence. The constraint is n1 >= 1.",
        "n2: Inputs the size of the third dimension in the FFT sequence. The constraint is n2 >= 1.",
        "in: Inputs the data to be transformed. Single precision: float*",
        "out: Outputs the data generated using FFT. Single precision: float*",
        "kind0: kind0 indicates the R2R transform type in the first dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: kml_fftf_r2r_kind*",
        "kind1: kind1 indicates the R2R transform type in the second dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: kml_fftf_r2r_kind*",
        "kind2: kind2 indicates the R2R transform type in the third dimension of an FFT sequence. It has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: kml_fftf_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru_dft_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维R2R变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru_dft_r2r(int rank, const kml_fft_iodim *dims, int howmany_rank, const kml_fft_iodim *howmany_dims, double *in, double *out, const kml_fft_r2r_kind *kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension.",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT.",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru_dft_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维R2R变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru_dft_r2r(int rank, const kml_fftf_iodim *dims, int howmany_rank, const kml_fftf_iodim *howmany_dims, float *in, float *out, const kml_fftf_r2r_kind *kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: int n: FFT length of the i-th dimension. int is: interval between successive elements of the i-th dimensional FFT input sequence. int os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: int n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. int is: interval between successive FFT input sequences of the i-th dimension. int os: interval between successive FFT output sequences of the i-th dimension.",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT.",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_guru64_dft_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立多组数据序列n维R2R变换的plan。",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_guru64_dft_r2r(int rank, const kml_fft_iodim64 *dims, int howmany_rank, const kml_fft_iodim64 *howmany_dims, double *in, double *out, const kml_fft_r2r_kind *kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Double precision: const kml_fft_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Double precision: const kml_fft_iodim64 *",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT.",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: const kml_fft_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_guru64_dft_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立多组数据序列n维R2R变换的plan。",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of a multiple data sequence.",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fftf_plan kml_fftf_plan_guru64_dft_r2r(int rank, const kml_fftf_iodim64 *dims, int howmany_rank, const kml_fftf_iodim64 *howmany_dims, float *in, float *out, const kml_fftf_r2r_kind *kind, unsigned flags)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "rank: Dimension of a single FFT sequence. Constraint: 1 <= rank <= 3",
        "dims: dims is a structure array whose size is rank. dims[i] contains the following members: ptrdiff_t n: FFT length of the i-th dimension. ptrdiff_t is: interval between successive elements of the i-th dimensional FFT input sequence. ptrdiff_t os: interval between successive elements of the i-th dimensional FFT output sequence. Constraint: dims[i].n >= 1, for i in 0 to rank - 1. Single precision: const kml_fftf_iodim64 *",
        "howmany_rank: The memory allocation between multiple rank-dimension FFTs is described by the howmany_dims array of the howmany_rank dimension. howmany_rank indicates the number of dimensions required by the memory access mode of the start address of each rank-dimension FFT to be calculated. Constraint: 0 <= howmany_rank <= 3",
        "howmany_dims: howmany_dims is a structure array whose size is howmany_rank. howmany_dims[i] contains the following members: ptrdiff_t n: number of pending FFTs in the i-th dimension of the howmany_rank dimension space. ptrdiff_t is: interval between successive FFT input sequences of the i-th dimension. ptrdiff_t os: interval between successive FFT output sequences of the i-th dimension. Single precision: const kml_fftf_iodim64 *",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT.",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: const kml_fftf_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fft(f)_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_plan_many_dft_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,建立howmany组数据序列n维R2R变换的plan",
      "desc_en": "double-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_many_r2r(int rank, const int *n, int howmany, double *in, const int *inembed, int istride, int idist, double *out, const int *onembed, int ostride, int odist, const kml_fft_r2r_kind *kind, unsigned flags)",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed.",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] >= n[i] for i in 0, rank-1. Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT.",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] >= n[i] for i in 0, rank-1. Or if onembed == NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Double precision: const kml_fft_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fftf_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fftf_plan_many_dft_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,建立howmany组数据序列n维R2R变换的plan",
      "desc_en": "single-precision type, create a plan for the n-dimensional real-to-real(R2R)transform of howmany data sequence",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "kml_fft_plan kml_fft_plan_many_r2r(int rank, const int *n, int howmany, double *in, const int *inembed, int istride, int idist, double *out, const int *onembed, int ostride, int odist, const kml_fft_r2r_kind *kind, unsigned flags)",
      "parameters": [
        "rank: Dimension of FFT. The constraint is 1 <= rank <= 3.",
        "n: Indicates an array whose dimension is rank, including the size of each dimension in the FFT sequence. The constraint is n[i] >= 1, for i in 0 to rank - 1.",
        "howmany: howmany indicates how many multi-dimensional FFTs are needed.",
        "in: Inputs the data to be transformed.",
        "inembed: inembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for input FFT data storage. Constraint: inembed[i] >= n[i] for i in 0, rank-1. Or if inembed == NULL, inembed is equal to n.",
        "istride: Interval between successive elements of the i-th dimensional FFT input sequence.",
        "idist: idist indicates the interval between FFT input sequences.",
        "out: Outputs the data generated using FFT.",
        "onembed: onembed is an array whose size is rank or NULL. This array indicates the size of each dimension of a larger space for output FFT data storage. Constraint: onembed[i] >= n[i] for i in 0, rank-1. Or if onembed == NULL, onembed is equal to n.",
        "ostride: Interval between successive elements of the i-th dimensional FFT output sequence.",
        "odist: odist indicates the interval between FFT output sequences.",
        "kind: kind is an array whose size is rank, including the R2R transform type in each dimension of an FFT sequence. kind[i] (for i in 0 to rank - 1) has the following options: KML_FFT_R2HC KML_FFT_HC2R KML_FFT_DHT KML_FFT_REDFT00 KML_FFT_REDFT01 KML_FFT_REDFT10 KML_FFT_REDFT11 KML_FFT_RODFT00 KML_FFT_RODFT01 KML_FFT_RODFT10 KML_FFT_RODFT11. Single precision: const kml_fftf_r2r_kind*",
        "flags: A planning option, not in use."
      ],
      "return": "The function returns a structure pointer of the kml_fftf_plan type. This object is used as a parameter in the kml_fft(f)_execute function to perform FFT on the current input and output. In addition, the object may also be added into the kml_fft(f)_execute_r2r function as a parameter to perform FFT on the new input and output. If this function returns a non-null pointer, the plan has been successfully executed. Otherwise, the plan failed to be executed."
    },
    {
      "name": "kml_fft_execute",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute(const kml_fft_plan p)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan"
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute(const kml_fftf_plan p)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Single precision: const kml_fftf_plan"
      ],
      "return": ""
    },
    {
      "name": "kml_fft_execute_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute_dft(const kml_fft_plan p, kml_fft_complex *in, kml_fft_complex *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT. Double precision: kml_fft_complex*"
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute_dft(const kml_fftf_plan p, kml_fftf_complex *in, kml_fftf_complex *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Single precision: const kml_fftf_plan",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*"
      ],
      "return": ""
    },
    {
      "name": "kml_fft_execute_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute_dft_r2c(const kml_fft_plan p, double *in, kml_fft_complex *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT. Single precision: const kml_fftf_plan"
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute_dft_r2c(const kml_fftf_plan p, float *in, kml_fftf_complex *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: kml_fft_complex*",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT. Single precision: kml_fftf_complex*"
      ],
      "return": ""
    },
    {
      "name": "kml_fft_execute_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute_dft_c2r(const kml_fft_plan p, kml_fft_complex *in, double *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan",
        "in: Inputs the data to be transformed. Double precision: kml_fft_complex*",
        "out: Outputs the data generated using FFT."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute_dft_c2r(const kml_fftf_plan p, kml_fftf_complex *in, float *out);",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Single precision: const kml_fftf_plan",
        "in: Inputs the data to be transformed. Single precision: kml_fftf_complex*",
        "out: Outputs the data generated using FFT."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_execute_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute_r2r(const kml_fft_plan p, double *in, double *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute_r2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute_r2r(const kml_fftf_plan p, float *in, float *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Single precision: const kml_fftf_plan",
        "in: Inputs the data to be transformed.",
        "out: Outputs the data generated using FFT."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_execute_split_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute_split_dft(const kml_fft_plan p, double *ri, double *ii, double *ro, double *io)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan",
        "ri: Inputs the real part of the data to be transformed.",
        "ii: Inputs the imaginary part of the data to be transformed.",
        "ro: Outputs the real part of the data to be transformed.",
        "io: Outputs the imaginary part of the data to be transformed."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute_split_dft",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute_split_dft(const kml_fftf_plan p, float *ri, float *ii, float *ro, float *io)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Single precision: const kml_fftf_plan",
        "ri: Inputs the real part of the data to be transformed.",
        "ii: Inputs the imaginary part of the data to be transformed.",
        "ro: Outputs the real part of the data to be transformed.",
        "io: Outputs the imaginary part of the data to be transformed."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_execute_split_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute_split_dft_r2c(const kml_fft_plan p, double *in, double *ro, double *io)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan",
        "in: Inputs the data to be transformed.",
        "ro: Outputs the real part of the data to be transformed.",
        "io: Outputs the imaginary part of the data to be transformed."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute_split_dft_r2c",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute_split_dft_r2c(const kml_fftf_plan p, float *in, float *ro, float *io)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Single precision: const kml_fftf_plan",
        "in: Inputs the data to be transformed.",
        "ro: Outputs the real part of the data to be transformed.",
        "io: Outputs the imaginary part of the data to be transformed."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_execute_split_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,执行之前建立的FFT变换plan",
      "desc_en": "double-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_execute_split_dft_c2r(const kml_fft_plan p, double *ri, double *ii, double *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Double precision: const kml_fft_plan",
        "ri: Inputs the real part of the data to be transformed.",
        "ii: Inputs the imaginary part of the data to be transformed.",
        "out: Outputs the data generated using FFT."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_execute_split_dft_c2r",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,执行之前建立的FFT变换plan",
      "desc_en": "single-precision type, executes the created FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_execute_split_dft_c2r(const kml_fftf_plan p, float *ri, float *ii, float *out)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan obtained by calling the plan function. Single precision: const kml_fftf_plan",
        "ri: Inputs the real part of the data to be transformed.",
        "ii: Inputs the imaginary part of the data to be transformed.",
        "out: Outputs the data generated using FFT."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_malloc",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型,分配所需内存空间",
      "desc_en": "double-precision type, allocate the required memory space",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void* kml_fft_malloc(size_t n)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Size of the requested memory space (in bytes)."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_malloc",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型,分配所需内存空间",
      "desc_en": "single-precision type, allocate the required memory space",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void* kml_fftf_malloc(size_t n)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "n: Size of the requested memory space (in bytes)."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_free",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "释放由kml_fft_malloc函数申请的内存",
      "desc_en": "Release the memory space applied using the kml_fft_malloc function",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_free(void *p)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: p is a pointer that points to the previously applied memory block."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_free",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "释放由kml_fftf_malloc函数申请的内存",
      "desc_en": "Release the memory space applied using the kml_fftf_malloc function",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_free(void *p)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: p is a pointer that points to the previously applied memory block."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_destroy_plan",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型，释放FFT变换plan的所有内存",
      "desc_en": "double-precision type, Release all the memory space used by an FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_destroy_plan(kml_fft_plan p)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan whose memory space is to be released. Double precision: const kml_fft_plan"
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_destroy_plan",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型，释放FFT变换plan的所有内存",
      "desc_en": "single-precision type, Release all the memory space used by an FFT plan",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_destroy_plan(kml_fftf_plan p)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "p: FFT plan whose memory space is to be released. Single precision: const kml_fftf_plan"
      ],
      "return": ""
    },
    {
      "name": "kml_fft_init_threads",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型，初始化线程",
      "desc_en": "double-precision type, initialize thread",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "int kml_fft_init_threads(void)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        ""
      ],
      "return": "If the kml_fft(f)_init_threads function is executed successfully, 1 is returned. Otherwise, 0 is returned."
    },
    {
      "name": "kml_fftf_init_threads",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型，初始化线程",
      "desc_en": "single-precision type, initialize thread",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "int kml_fftf_init_threads(void)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        ""
      ],
      "return": "If the kml_fft(f)_init_threads function is executed successfully, 1 is returned. Otherwise, 0 is returned."
    },
    {
      "name": "kml_fft_plan_with_nthreads",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型，指定FFT库接口函数执行的线程数",
      "desc_en": "double-precision type, specify the number of threads executed by the FFT library interface function",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_plan_with_nthreads(int nthreads)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "nthreads: Number of threads used for executing the subsequent FFT plan."
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_plan_with_nthreads",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型，指定FFT库接口函数执行的线程数",
      "desc_en": "single-precision type, specify the number of threads executed by the FFT library interface function",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_plan_with_nthreads(int nthreads)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        "nthreads: Number of threads used for executing the subsequent FFT plan."
      ],
      "return": ""
    },
    {
      "name": "kml_fft_cleanup_threads",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "双精度类型，释放多线程框架相关的资源",
      "desc_en": "double-precision type, release resources related the multi-thread framework",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fft_cleanup_threads(void)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        ""
      ],
      "return": ""
    },
    {
      "name": "kml_fftf_cleanup_threads",
      "library": "KML_FFT",
      "headerfile": "",
      "desc_cn": "单精度类型，释放多线程框架相关的资源",
      "desc_en": "single-precision type, release resources related the multi-thread framework",
      "benefit_cn": "通过Neon指令优化、内联汇编等方法，充分利用鲲鹏架构下的寄存器特点，实现了在鲲鹏服务器上的性能提升",
      "benefit_en": "Using methods such as NEON instruction optimization and inline assembly. It makes full use of register features in the Kunpeng architecture to improve performance of Kunpeng servers",
      "func_name": "void kml_fftf_cleanup_threads(void)",
      "headerfile_desc": "kfft.h",
      "parameters": [
        ""
      ],
      "return": ""
    },
    {
      "name": "sgetrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "单精度实数类型矩阵的LU分解",
      "desc_en": "LU factorization of single precision real type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵LU分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix LU factorization performance",
      "func_name": "void sgetrf_(const int *m, const int *n, float *a, const int *lda, int *ipiv, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save matrix A before calling. After this function is called, the factorization results L and U are saved, and the diagonal elements of L (all are 1) are not saved.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is min(m, n). For 1 <= ipiv <= min(m, n), row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of matrix U is 0. The matrix factorization is complete, but U is singular. As a result, an error of dividing by zero occurs when a system of linear equations is solved."
      ],
      "return": ""
    },
    {
      "name": "dgetrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "双精度实数类型矩阵的LU分解",
      "desc_en": "LU factorization of double precision real type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵LU分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix LU factorization performance",
      "func_name": "void dgetrf_(const int *m, const int *n, double *a, const int *lda, int *ipiv, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save matrix A before calling. After this function is called, the factorization results L and U are saved, and the diagonal elements of L (all are 1) are not saved.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is min(m, n). For 1 <= ipiv <= min(m, n), row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of matrix U is 0. The matrix factorization is complete, but U is singular. As a result, an error of dividing by zero occurs when a system of linear equations is solved."
      ],
      "return": ""
    },
    {
      "name": "cgetrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "单精度复数类型矩阵的LU分解",
      "desc_en": "LU factorization of single precision complex type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵LU分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix LU factorization performance",
      "func_name": "void cgetrf_(const int *m, const int *n, float _Complex *a, const int *lda, int *ipiv, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save matrix A before calling. After this function is called, the factorization results L and U are saved, and the diagonal elements of L (all are 1) are not saved.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is min(m, n). For 1 <= ipiv <= min(m, n), row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of matrix U is 0. The matrix factorization is complete, but U is singular. As a result, an error of dividing by zero occurs when a system of linear equations is solved."
      ],
      "return": ""
    },
    {
      "name": "zgetrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "双精度复数类型矩阵的LU分解",
      "desc_en": "LU factorization of double precision complex type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵LU分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix LU factorization performance",
      "func_name": "void zgetrf_(const int *m, const int *n, double _Complex *a, const int *lda, int *ipiv, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save matrix A before calling. After this function is called, the factorization results L and U are saved, and the diagonal elements of L (all are 1) are not saved.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is min(m, n). For 1 <= ipiv <= min(m, n), row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of matrix U is 0. The matrix factorization is complete, but U is singular. As a result, an error of dividing by zero occurs when a system of linear equations is solved."
      ],
      "return": ""
    },
    {
      "name": "sgeqrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "单精度实数类型矩阵的QR分解",
      "desc_en": "QR factorization of single precision real type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵QR分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix QR factorization performance",
      "func_name": "void sgeqrf_(const int *m, const int *n, float *a, const int *lda, float *tau, float *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save the matrix A to be factorized before calling this function. After this function is invoked, a matrix R with a size of min(m,n)*n (when m≥n, R is an upper triangular matrix) is stored on and above the diagonal. Elements below the diagonal and tau jointly represent an orthogonal matrix Q (see the NOTE).",
        "lda: Leading dimension of the matrix A. lda ≥ max(1, m).",
        "tau: Elementary reflection coefficient. Its length is min(m,n). For details, see the NOTE.",
        "work: Temporary storage space. After this interface is called with lwork = -1, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "dgeqrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "双精度实数类型矩阵的QR分解",
      "desc_en": "QR factorization of double precision real type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵QR分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix QR factorization performance",
      "func_name": "void dgeqrf_(const int *m, const int *n, double *a, const int *lda, double *tau, double *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save the matrix A to be factorized before calling this function. After this function is invoked, a matrix R with a size of min(m,n)*n (when m≥n, R is an upper triangular matrix) is stored on and above the diagonal. Elements below the diagonal and tau jointly represent an orthogonal matrix Q (see the NOTE).",
        "lda: Leading dimension of the matrix A. lda ≥ max(1, m).",
        "tau: Elementary reflection coefficient. Its length is min(m,n). For details, see the NOTE.",
        "work: Temporary storage space. After this interface is called with lwork = -1, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "cgeqrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "单精度复数类型矩阵的QR分解",
      "desc_en": "QR factorization of single precision complex type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵QR分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix QR factorization performance",
      "func_name": "void cgeqrf_(const int *m, const int *n, float _Complex *a, const int *lda, float _Complex *tau, float _Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save the matrix A to be factorized before calling this function. After this function is invoked, a matrix R with a size of min(m,n)*n (when m≥n, R is an upper triangular matrix) is stored on and above the diagonal. Elements below the diagonal and tau jointly represent an orthogonal matrix Q (see the NOTE).",
        "lda: Leading dimension of the matrix A. lda ≥ max(1, m).",
        "tau: Elementary reflection coefficient. Its length is min(m,n). For details, see the NOTE.",
        "work: Temporary storage space. After this interface is called with lwork = -1, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "zgeqrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "双精度复数类型矩阵的QR分解",
      "desc_en": "QR factorization of double precision complex type matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵QR分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix QR factorization performance",
      "func_name": "void zgeqrf_(const int *m, const int *n, double _Complex *a, const int *lda, double _Complex *tau, double _Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix A",
        "n: Number of columns in matrix A",
        "a: Save the matrix A to be factorized before calling this function. After this function is invoked, a matrix R with a size of min(m,n)*n (when m≥n, R is an upper triangular matrix) is stored on and above the diagonal. Elements below the diagonal and tau jointly represent an orthogonal matrix Q (see the NOTE).",
        "lda: Leading dimension of the matrix A. lda ≥ max(1, m).",
        "tau: Elementary reflection coefficient. Its length is min(m,n). For details, see the NOTE.",
        "work: Temporary storage space. After this interface is called with lwork = -1, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "spotrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "单精度实对称正定矩阵的Cholesky分解",
      "desc_en": "Cholesky decomposition of single precision real symmetric positive definite matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵Cholesky分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix Cholesky decomposition performance",
      "func_name": "void spotrf_(const char *uplo, const int *n, float *a, const int *lda, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A.",
        "n: 'L': saves the lower triangular matrix of A.",
        "a: Save the matrix A to be factorized before calling this function. When uplo = 'U', the upper triangular matrix U is saved for the diagonal and the part above the diagonal after the function is called. When uplo = 'L', the lower triangular matrix L is saved for the diagonal and the part below the diagonal save after the function is called.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "info: Execution result: 0: The exit is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The principal minor of order info in matrix A is not positive definite, and the factorization cannot be completed."
      ],
      "return": ""
    },
    {
      "name": "dpotrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "双精度实对称正定矩阵的Cholesky分解",
      "desc_en": "Cholesky decomposition of double precision real symmetric positive definite matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵Cholesky分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix Cholesky decomposition performance",
      "func_name": "void dpotrf_(const char *uplo, const int *n, double *a, const int *lda, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A.",
        "n: 'L': saves the lower triangular matrix of A.",
        "a: Save the matrix A to be factorized before calling this function. When uplo = 'U', the upper triangular matrix U is saved for the diagonal and the part above the diagonal after the function is called. When uplo = 'L', the lower triangular matrix L is saved for the diagonal and the part below the diagonal save after the function is called.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "info: Execution result: 0: The exit is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The principal minor of order info in matrix A is not positive definite, and the factorization cannot be completed."
      ],
      "return": ""
    },
    {
      "name": "cpotrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "单精度Hermite正定矩阵的Cholesky分解",
      "desc_en": "Cholesky decomposition of single precision Hermite positive definite matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵Cholesky分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix Cholesky decomposition performance",
      "func_name": "void cpotrf_(const char *uplo, const int *n, float _Complex *a, const int *lda, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A.",
        "n: 'L': saves the lower triangular matrix of A.",
        "a: Save the matrix A to be factorized before calling this function. When uplo = 'U', the upper triangular matrix U is saved for the diagonal and the part above the diagonal after the function is called. When uplo = 'L', the lower triangular matrix L is saved for the diagonal and the part below the diagonal save after the function is called.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "info: Execution result: 0: The exit is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The principal minor of order info in matrix A is not positive definite, and the factorization cannot be completed."
      ],
      "return": ""
    },
    {
      "name": "zpotrf",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "双精度Hermite正定矩阵的Cholesky分解",
      "desc_en": "Cholesky decomposition of double precision Hermite positive definite matrices",
      "benefit_cn": "基于鲲鹏框架，实现矩阵Cholesky分解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates matrix Cholesky decomposition performance",
      "func_name": "void zpotrf_(const char *uplo, const int *n, double _Complex *a, const int *lda, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A.",
        "n: 'L': saves the lower triangular matrix of A.",
        "a: Save the matrix A to be factorized before calling this function. When uplo = 'U', the upper triangular matrix U is saved for the diagonal and the part above the diagonal after the function is called. When uplo = 'L', the lower triangular matrix L is saved for the diagonal and the part below the diagonal save after the function is called.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "info: Execution result: 0: The exit is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The principal minor of order info in matrix A is not positive definite, and the factorization cannot be completed."
      ],
      "return": ""
    },
    {
      "name": "sppsv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为单精度实对称正定矩阵，并以压缩格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a single precision real symmetric positive definite matrix and packed storage",
      "benefit_cn": "基于鲲鹏框架，实现对称正定矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with symmetric positive definite matrices",
      "func_name": "void sppsv_(const char *uplo, const int *n, const int *nrhs, float *ap, float *b, const int *ldb, int *info);",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A. 'L': saves the lower triangular matrix of A.",
        "n: Number of rows or columns in the symmetric matrix A.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs ≥ 0.",
        "ap: Before calling, symmetric matrix A is stored in the packed format. For details, see the description of the packed storage matrix in the Data Structures. After calling, the Cholesky factorization result (U or L) of matrix A is stored in the packed format.",
        "b: Matrix B on the right before calling. Solved matrix X after calling.",
        "ldb: Leading dimension of matrix B. ldb ≥ max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The sequence principal minor of the info size in matrix A is not positive definite, and the factorization cannot be completed. Outpu"
      ],
      "return": ""
    },
    {
      "name": "dppsv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为双精度实对称正定矩阵，并以压缩格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a double precision real symmetric positive definite matrix and packed storage",
      "benefit_cn": "基于鲲鹏框架，实现对称正定矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with symmetric positive definite matrices",
      "func_name": "void dppsv_(const char *uplo, const int *n, const int *nrhs, double *ap, double *b, const int *ldb, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A. 'L': saves the lower triangular matrix of A.",
        "n: Number of rows or columns in the symmetric matrix A.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs ≥ 0.",
        "ap: Before calling, symmetric matrix A is stored in the packed format. For details, see the description of the packed storage matrix in the Data Structures. After calling, the Cholesky factorization result (U or L) of matrix A is stored in the packed format.",
        "b: Matrix B on the right before calling. Solved matrix X after calling.",
        "ldb: Leading dimension of matrix B. ldb ≥ max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The sequence principal minor of the info size in matrix A is not positive definite, and the factorization cannot be completed. Outpu"
      ],
      "return": ""
    },
    {
      "name": "cppsv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为单精度Hermite正定矩阵，并以压缩格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a single precision Hermite positive definite matrix and packed storage",
      "benefit_cn": "基于鲲鹏框架，实现对称正定矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with symmetric positive definite matrices",
      "func_name": "void cppsv_(const char *uplo, const int *n, const int *nrhs, float _Complex *ap, float _Complex *b, const int *ldb, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A. 'L': saves the lower triangular matrix of A.",
        "n: Number of rows or columns in the symmetric matrix A.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs ≥ 0.",
        "ap: Before calling, symmetric matrix A is stored in the packed format. For details, see the description of the packed storage matrix in the Data Structures. After calling, the Cholesky factorization result (U or L) of matrix A is stored in the packed format.",
        "b: Matrix B on the right before calling. Solved matrix X after calling.",
        "ldb: Leading dimension of matrix B. ldb ≥ max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The sequence principal minor of the info size in matrix A is not positive definite, and the factorization cannot be completed. Outpu"
      ],
      "return": ""
    },
    {
      "name": "zppsv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为双精度Hermite正定矩阵，并以压缩格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a double precision real symmetric positive definite matrix and packed storage",
      "benefit_cn": "基于鲲鹏框架，实现对称正定矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with symmetric positive definite matrices",
      "func_name": "void zppsv_(const char *uplo, const int *n, const int *nrhs, double _Complex *ap, double _Complex *b, const int *ldb, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "uplo: 'U': saves the upper triangular matrix of A. 'L': saves the lower triangular matrix of A.",
        "n: Number of rows or columns in the symmetric matrix A.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs ≥ 0.",
        "ap: Before calling, symmetric matrix A is stored in the packed format. For details, see the description of the packed storage matrix in the Data Structures. After calling, the Cholesky factorization result (U or L) of matrix A is stored in the packed format.",
        "b: Matrix B on the right before calling. Solved matrix X after calling.",
        "ldb: Leading dimension of matrix B. ldb ≥ max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The sequence principal minor of the info size in matrix A is not positive definite, and the factorization cannot be completed. Outpu"
      ],
      "return": ""
    },
    {
      "name": "sgesv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为一般单精度实矩阵，并以完全格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a single precision real general matrix and full storage",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with general matrices",
      "func_name": "void sgesv_(const int *n,const int *nrhs,float *a,const int *lda,int *ipiv, float *b, const int *ldb,int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Order of matrix A. The value of n must be greater than or equal to 0.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs >= 0.",
        "a: The matrix dimension is (lda, n). Input: n*n coefficient matrix A. Output:  L and U  in A = P * L * U are stored. The unit diagonal element of L is not stored.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: The array dimension is n. Array storing the pivot indices of the permutation matrix P. The ith row and is interchanged with the ipiv(i)th row of the matrix.",
        "b: The matrix dimension is (ldb, nrhs). Input: matrix B on the right of n * nrhs. Output: When info = 0, is the solution matrix X of n * nrhs.",
        "ldb: Leading dimension of the matrix B. ldb >= max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: U(info.info) = 0. The factorization is complete, but the solution cannot be completed due to the singularity of U."
      ],
      "return": ""
    },
    {
      "name": "dgesv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为一般双精度实矩阵，并以完全格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a double precision real general matrix and full storage",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with general matrices",
      "func_name": "void dgesv_(const int *n,const int *nrhs,double *a,const int *lda,int *ipiv, double *b, const int *ldb,int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Order of matrix A. The value of n must be greater than or equal to 0.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs >= 0.",
        "a: The matrix dimension is (lda, n). Input: n*n coefficient matrix A. Output:  L and U  in A = P * L * U are stored. The unit diagonal element of L is not stored.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: The array dimension is n. Array storing the pivot indices of the permutation matrix P. The ith row and is interchanged with the ipiv(i)th row of the matrix.",
        "b: The matrix dimension is (ldb, nrhs). Input: matrix B on the right of n * nrhs. Output: When info = 0, is the solution matrix X of n * nrhs.",
        "ldb: Leading dimension of the matrix B. ldb >= max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: U(info.info) = 0. The factorization is complete, but the solution cannot be completed due to the singularity of U."
      ],
      "return": ""
    },
    {
      "name": "cgesv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为一般单精度复矩阵，并以完全格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a single precision complex general matrix and full storage",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with general matrices",
      "func_name": "void cgesv_(const int *n,const int *nrhs,float_Complex *a,const int *lda,int *ipiv, float_Complex *b, const int *ldb,int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Order of matrix A. The value of n must be greater than or equal to 0.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs >= 0.",
        "a: The matrix dimension is (lda, n). Input: n*n coefficient matrix A. Output:  L and U  in A = P * L * U are stored. The unit diagonal element of L is not stored.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: The array dimension is n. Array storing the pivot indices of the permutation matrix P. The ith row and is interchanged with the ipiv(i)th row of the matrix.",
        "b: The matrix dimension is (ldb, nrhs). Input: matrix B on the right of n * nrhs. Output: When info = 0, is the solution matrix X of n * nrhs.",
        "ldb: Leading dimension of the matrix B. ldb >= max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: U(info.info) = 0. The factorization is complete, but the solution cannot be completed due to the singularity of U."
      ],
      "return": ""
    },
    {
      "name": "zgesv",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "求解线性方程组Ax=b,其中A为一般双精度复矩阵，并以完全格式存储",
      "desc_en": "Solve a system of linear equations Ax=b where A is a double precision complex general matrix and full storage",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵线性方程组的求解性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the solution performance of linear equations with general matrices",
      "func_name": "void zgesv_(const int *n,const int *nrhs, double_Complex *a,const int *lda,int *ipiv, double_Complex *b, const int *ldb,int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Order of matrix A. The value of n must be greater than or equal to 0.",
        "nrhs: Number of items on the right, that is, the number of columns in matrix B. nrhs >= 0.",
        "a: The matrix dimension is (lda, n). Input: n*n coefficient matrix A. Output:  L and U  in A = P * L * U are stored. The unit diagonal element of L is not stored.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n).",
        "ipiv: The array dimension is n. Array storing the pivot indices of the permutation matrix P. The ith row and is interchanged with the ipiv(i)th row of the matrix.",
        "b: The matrix dimension is (ldb, nrhs). Input: matrix B on the right of n * nrhs. Output: When info = 0, is the solution matrix X of n * nrhs.",
        "ldb: Leading dimension of the matrix B. ldb >= max(1, n).",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: U(info.info) = 0. The factorization is complete, but the solution cannot be completed due to the singularity of U."
      ],
      "return": ""
    },
    {
      "name": "sgetri",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据sgetrf得到的分解结果，计算逆矩阵",
      "desc_en": "Calculate the inverse matrix according to the decomposition result obtained by sgetrf",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵求逆矩阵性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates general matrix inversion matrix performance",
      "func_name": "void sgetri_(const int *n, float *a, const int *lda, const int *ipiv, float *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Number of rows or columns in square matrix A",
        "a: Saves the LU factorization result of matrix A (obtained using ?getrf) before calling. Saves the inverse matrix of matrix A after calling.",
        "lda: Leading dimension of matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is n. For 1<=ipiv<=n, row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "work: Temporary storage space. After lwork=-1 is called, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of U is 0, and the matrix cannot be inverted."
      ],
      "return": ""
    },
    {
      "name": "dgetri",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据dgetrf得到的分解结果，计算逆矩阵",
      "desc_en": "Calculate the inverse matrix according to the decomposition result obtained by dgetrf",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵求逆矩阵性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates general matrix inversion matrix performance",
      "func_name": "void dgetri_(const int *n, double *a, const int *lda, const int *ipiv, double *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Number of rows or columns in square matrix A",
        "a: Saves the LU factorization result of matrix A (obtained using ?getrf) before calling. Saves the inverse matrix of matrix A after calling.",
        "lda: Leading dimension of matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is n. For 1<=ipiv<=n, row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "work: Temporary storage space. After lwork=-1 is called, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of U is 0, and the matrix cannot be inverted."
      ],
      "return": ""
    },
    {
      "name": "cgetri",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据cgetrf得到的分解结果，计算逆矩阵",
      "desc_en": "Calculate the inverse matrix according to the decomposition result obtained by cgetrf",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵求逆矩阵性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates general matrix inversion matrix performance",
      "func_name": "void cgetri_(const int *n, float _Complex *a, const int *lda, const int *ipiv, float _Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Number of rows or columns in square matrix A",
        "a: Saves the LU factorization result of matrix A (obtained using ?getrf) before calling. Saves the inverse matrix of matrix A after calling.",
        "lda: Leading dimension of matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is n. For 1<=ipiv<=n, row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "work: Temporary storage space. After lwork=-1 is called, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of U is 0, and the matrix cannot be inverted."
      ],
      "return": ""
    },
    {
      "name": "zgetri",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据zgetrf得到的分解结果，计算逆矩阵",
      "desc_en": "Calculate the inverse matrix according to the decomposition result obtained by zgetrf",
      "benefit_cn": "基于鲲鹏框架，实现一般矩阵求逆矩阵性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates general matrix inversion matrix performance",
      "func_name": "void zgetri_(const int *n, double _Complex *a, const int *lda, const int *ipiv, double _Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "n: Number of rows or columns in square matrix A",
        "a: Saves the LU factorization result of matrix A (obtained using ?getrf) before calling. Saves the inverse matrix of matrix A after calling.",
        "lda: Leading dimension of matrix A. lda >= max(1, n).",
        "ipiv: An array containing pivot indices obtained from ?getrf. Its length is n. For 1<=ipiv<=n, row i and row ipiv[i-1] of the matrix are interchanged during factorization.",
        "work: Temporary storage space. After lwork=-1 is called, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1, the value of lwork must be greater than or equal to n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0: The info-th element on the diagonal of U is 0, and the matrix cannot be inverted."
      ],
      "return": ""
    },
    {
      "name": "dsyevd",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "双精度实对称矩阵的全部特征值和特征向量（可选）",
      "desc_en": "All eigenvalues and eigenvectors(optional) of double precision real symmetric matrices",
      "benefit_cn": "基于鲲鹏框架，实现对称矩阵全部特征值及特征向量性能加速",
      "benefit_en": "Basic Kunpeng architecture accelerates the performance of all eigenvalues and eigenvectors of the symmetric matrix",
      "func_name": "void dsyevd_(const char *jobz, const char *uplo, const int *n, double *a, const int *lda, double *w, double *work, const int *lwork, int *iwork, const int *liwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "jobz: 'N': Computes only eigenvectors. 'V': Computes eigenvalues and eigenvectors at the same time.",
        "uplo: 'U': saves the upper triangular matrix of A. 'L': saves the lower triangular matrix of A.",
        "n: Number of rows or columns in the symmetric matrix A.",
        "a: Saves the symmetric matrix to be factorized before calling. Saves the eigenvectors after calling.",
        "lda: Leading dimension of the matrix A. lda >= max(1, n)",
        "w: Eigenvalues in ascending order. The length is n.",
        "work: Temporary storage space. After calling, work[0] is the optimal lwork value.",
        "lwork: Length of the work array. If lwork = -1, the optimal work size is queried and the result is saved in work[0]. If lwork ≠ -1: When n <= 1, lwork >= 1.  When jobz = 'N' and n > 1, lwork >= 2*n + 1. When jobz = 'V' and n > 1, lwork >= 1 + 6*n + 2*n*n.",
        "iwork: Temporary storage space. After this interface is called with lwork = -1, iwork[0] is the optimal liwork value.",
        "liwork: Length of the iwork array. If liwork=-1, the optimal iwork size is queried and the result is saved in iwork[0]. Otherwise: When jobz = 'N' or n <= 1, liwork >= 1. When jobz = 'V' and n > 1, liwork >= 3 + 5*n.",
        "info: Execution result: 0: The execution is successful. Smaller than 0: The value of the -info-th parameter is invalid. Greater than 0, the jobz='N' calculation cannot be converged and the info/(n+1)-th eigenvalue cannot be calculated."
      ],
      "return": ""
    },
    {
      "name": "sorglq",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据sgelqf的计算结果，生成具有正交行的单精度实矩阵Q，即该矩阵是K个N阶基本变换矩阵的乘积的前M行",
      "desc_en": "Single precision real matrix Q with orthogonal rows is generated according to the result obtained by sgelqf,and the matrix is defined as the first M rows of the product of K elementary reflectors of order N",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void sorglq_(const int *m, const int *n, const int *k, float *a, const int *lda, const float *tau, float *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. n >= m.",
        "k: Number of elementary reflectors whose product defines matrix Q. m ≥ k ≥ 0",
        "a: Dimension (lda, n). When a is used as an input, row i must contain the basic reflector H(i) vector, where H(i) is returned by ?gelqf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the elementary reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, m). For the optimal performance, lwork >= m*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: nfo = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "dorglq",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据dgelqf的计算结果，生成具有正交行的双精度实矩阵Q，即该矩阵是K个N阶基本变换矩阵的乘积的前M行",
      "desc_en": "Double precision real matrix Q with orthogonal rows is generated according to the result obtained by dgelqf,and the matrix is defined as the first M rows of the product of K elementary reflectors of order N",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void dorglq_(const int *m, const int *n, const int *k, double *a, const int *lda, const double *tau, double *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. n >= m.",
        "k: Number of elementary reflectors whose product defines matrix Q. m ≥ k ≥ 0",
        "a: Dimension (lda, n). When a is used as an input, row i must contain the basic reflector H(i) vector, where H(i) is returned by ?gelqf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the elementary reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, m). For the optimal performance, lwork >= m*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: nfo = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "cunglq",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据cgelqf的计算结果，生成具有正交行的单精度复矩阵Q，即该矩阵是K个N阶基本变换矩阵的乘积的前M行",
      "desc_en": "Single precision complex matrix Q with orthogonal rows is generated according to the result obtained by cgelqf,and the matrix is defined as the first M rows of the product of K elementary reflectors of order N",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void cunglq_(const int *m, const int *n, const int *k, float_Complex *a, const int *lda, const float_Complex *tau, float_Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. n >= m.",
        "k: Number of elementary reflectors whose product defines matrix Q. m ≥ k ≥ 0",
        "a: Dimension (lda, n). When a is used as an input, row i must contain the basic reflector H(i) vector, where H(i) is returned by ?gelqf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the elementary reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, m). For the optimal performance, lwork >= m*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: nfo = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "zunglq",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据zgelqf的计算结果，生成具有正交行的双精度复矩阵Q，即该矩阵是K个N阶基本变换矩阵的乘积的前M行",
      "desc_en": "Double precision complex matrix Q with orthogonal rows is generated according to the result obtained by zgelqf,and the matrix is defined as the first M rows of the product of K elementary reflectors of order N",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void zunglq_(const int *m, const int *n, const int *k, double_Complex *a, const int *lda, const double_Complex *tau, double_Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. n >= m.",
        "k: Number of elementary reflectors whose product defines matrix Q. m ≥ k ≥ 0",
        "a: Dimension (lda, n). When a is used as an input, row i must contain the basic reflector H(i) vector, where H(i) is returned by ?gelqf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the elementary reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, m). For the optimal performance, lwork >= m*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: nfo = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "sorgqr",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据sgeqrf的计算结果，生成具有正交行的单精度实矩阵Q，即该矩阵是K个M阶基本变换矩阵的乘积的前N行",
      "desc_en": "Single precision real matrix Q with orthogonal rows is generated according to the result obtained by sgeqrf,and the matrix is defined as the first N rows of the product of K elementary reflectors of order M",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void sorgqr_(const int *m, const int *n, const int *k, float *a, const int *lda, const float *tau, float *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. m >= n >= 0.",
        "k: Number of elementary reflectors whose product defines matrix Q. n ≥ k ≥ 0",
        "a: Dimension (lda, n). If a is used as an input, column i must contain the elementary reflector H(i) vector, where H(i) is returned by ?geqrf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the basic reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, n). For the optimal performance, lwork >= n*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: info = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "dorgqr",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据dgeqrf的计算结果，生成具有正交行的单精度实矩阵Q，即该矩阵是K个M阶基本变换矩阵的乘积的前N行",
      "desc_en": "Double precision real matrix Q with orthogonal rows is generated according to the result obtained by sgeqrf,and the matrix is defined as the first N rows of the product of K elementary reflectors of order M",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void dorgqr_(const int *m, const int *n, const int *k, double *a, const int *lda, const double *tau, double *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. m >= n >= 0.",
        "k: Number of elementary reflectors whose product defines matrix Q. n ≥ k ≥ 0",
        "a: Dimension (lda, n). If a is used as an input, column i must contain the elementary reflector H(i) vector, where H(i) is returned by ?geqrf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the basic reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, n). For the optimal performance, lwork >= n*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: info = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "cungqr",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据cgeqrf的计算结果，生成具有正交行的单精度实矩阵Q，即该矩阵是K个M阶基本变换矩阵的乘积的前N行",
      "desc_en": "Single precision complex matrix Q with orthogonal rows is generated according to the result obtained by sgeqrf,and the matrix is defined as the first N rows of the product of K elementary reflectors of order M",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void cungqr_(const int *m, const int *n, const int *k, float_Complex *a, const int *lda, const float_Complex *tau, float_Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. m >= n >= 0.",
        "k: Number of elementary reflectors whose product defines matrix Q. n ≥ k ≥ 0",
        "a: Dimension (lda, n). If a is used as an input, column i must contain the elementary reflector H(i) vector, where H(i) is returned by ?geqrf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the basic reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, n). For the optimal performance, lwork >= n*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: info = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "zungqr",
      "library": "KML_LAPACK",
      "headerfile": "",
      "desc_cn": "根据zgeqrf的计算结果，生成具有正交行的单精度实矩阵Q，即该矩阵是K个M阶基本变换矩阵的乘积的前N行",
      "desc_en": "Double precision complex matrix Q with orthogonal rows is generated according to the result obtained by sgeqrf,and the matrix is defined as the first N rows of the product of K elementary reflectors of order M",
      "benefit_cn": "基于鲲鹏框架，实现生成正交矩阵Q性能加速",
      "benefit_en": "Basic Kunpeng architecture for performance acceleration with quadrature matrix Q",
      "func_name": "void zungqr_(const int *m, const int *n, const int *k, double_Complex *a, const int *lda, const double_Complex *tau, double_Complex *work, const int *lwork, int *info)",
      "headerfile_desc": "klapack.h",
      "parameters": [
        "m: Number of rows in matrix Q. m >= 0.",
        "n: Number of columns in matrix Q. m >= n >= 0.",
        "k: Number of elementary reflectors whose product defines matrix Q. n ≥ k ≥ 0",
        "a: Dimension (lda, n). If a is used as an input, column i must contain the elementary reflector H(i) vector, where H(i) is returned by ?geqrf. When a is used as an output, it is an m*n matrix Q.",
        "lda: Leading dimension of the matrix A. lda >= max(1, m).",
        "tau: tau(i) must contain the constant factor of the basic reflector H(i), which is returned by ?gelqf.",
        "work: Number of dimensions max(1, lwork). If info = 0, work(1) returns the optimal lwork value.",
        "lwork: Number of dimensions of the work array. lwork >= max(1, n). For the optimal performance, lwork >= n*nb, where nb is the optimal block size. If lwork = -1, this routine calculates only the optimal size of the work array and returns the first value of the work array.",
        "info: info = 0: The execution is successful. info < 0: If the value of info is -i, the ith parameter is invalid."
      ],
      "return": ""
    },
    {
      "name": "HMPPS_Abs_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Abs_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Abs_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Abs_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Abs_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Abs_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Abs_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Abs_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes absolute values of vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_8u16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16s32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_64s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Add_32sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_64u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_64s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddC_32sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds a constant value to each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProduct_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProduct_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProduct_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProduct_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProduct_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProduct_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProduct_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProductC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of  two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AddProductC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Adds product of  two vectors to the accumulator vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_And_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_And_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_And_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_And_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_And_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_And_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AndC_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AndC_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AndC_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AndC_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AndC_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AndC_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise AND of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Arctan_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes inverse trigonometric function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Arctan_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes inverse trigonometric function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Arctan_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes inverse trigonometric function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Arctan_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes inverse trigonometric function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Arctan2_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function computes the angle between the X axis and the line from the origin to the point (X,Y),",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Arctan2_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function computes the angle between the X axis and the line from the origin to the point (X,Y),",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CartToPolar_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the elements of a complex vector to polar coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CartToPolar_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the elements of a complex vector to polar coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CartToPolar_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the elements of a complex vector to polar coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CartToPolar_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the elements of a complex vector to polar coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CartToPolar_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the elements of a complex vector to polar coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Conj_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set conj interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Conj_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set conj interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Conj_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set conj interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Conj_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set conj interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Conj_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set conj interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Conj_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set conj interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_8u32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16u32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_24u32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_24u32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_8s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_8s16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_8s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16s32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16s16f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_24s32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_24s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32s16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32s64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64s64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16f32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f16f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64f32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16s32f_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16s64f_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32s16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32s32f_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32s64f_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32s24s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32u24u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f24u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f24s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_8u8s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16s8s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_16f16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f8s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_32f32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64f8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64f16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64f8s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64f16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64f32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convert_64f64s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data conversion interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_64s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_32sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_64sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Copy_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data copy interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CountInRange_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes Number of digits in a specified range.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Cos_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes cosine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Cos_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes cosine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Cos_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes cosine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Cos_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes cosine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CplxToReal_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data CplxToReal interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CplxToReal_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data CplxToReal interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CplxToReal_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data CplxToReal interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Cubrt_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes cube root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Cubrt_32s16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes cube root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_32s16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of two vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_64s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivC_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivCRev_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides a constant value by each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivCRev_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides a constant value by each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivCRev_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides a constant value by each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DivCRev_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides a constant value by each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_Round_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides the elements of two vectors with rounding.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_Round_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides the elements of two vectors with rounding.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_Round_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides the elements of two vectors with rounding.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_Round_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides the elements of two vectors with rounding.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_Round_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides the elements of two vectors with rounding.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Div_Round_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Divides the elements of two vectors with rounding.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_16s64s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_16s16sc64sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_32f64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_32f32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_32f32fc64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_64f64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_16sc64sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_32fc64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_16s32s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DotProd_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the dot product of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Exp_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes e to the exp of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FindNearest_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Finds table elements that are closest to the elements of the specified vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FindNearestOne_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Finds an element of the table which is closest to the specified value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Flip_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the order of elements in a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Imag_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data get imag interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Imag_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data get imag interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Imag_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data get imag interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Ln_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the natural logarithm of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Log10_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculate the common logarithm for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Log10_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculate the common logarithm for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Log10_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculate the common logarithm for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Log10_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculate the common logarithm for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_LShiftC_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the left.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_16sc32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Magnitude_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the magnitude of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Max_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Max_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Max_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Max_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxAbs_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum absolute value of the input vector src, and stores the result in maxAbs.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxAbs_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum absolute value of the input vector src, and stores the result in maxAbs.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxAbs_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum absolute value of the input vector src, and stores the result in maxAbs.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxAbs_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum absolute value of the input vector src, and stores the result in maxAbs.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxAbsIndx_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum absolute value of the input vector src, and stores the result in maxAbs.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxAbsIndx_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function return the maximum absolute value of the input vector src, and stores the result in maxAbs.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxEvery_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes maximum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxEvery_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes maximum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxEvery_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes maximum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxEvery_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes maximum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxEvery_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes maximum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxEvery_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes maximum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxIndx_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function returns the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxIndx_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function returns the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxIndx_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function returns the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MaxIndx_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function returns the maximum value of the input vector src, and stores the result in max.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mean_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the mean (average) of the vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mean_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the mean (average) of the vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mean_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the mean (average) of the vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mean_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the mean (average) of the vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mean_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the mean (average) of the vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mean_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the mean (average) of the vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mean_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the mean (average) of the vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MeanStdDev_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the mean value and the standard deviation value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MeanStdDev_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the mean value and the standard deviation value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MeanStdDev_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the mean value and the standard deviation value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MeanStdDev_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the mean value and the standard deviation value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Min_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Min_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Min_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Min_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinAbs_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum absolute value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinAbs_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum absolute value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinAbs_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum absolute value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinAbs_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum absolute value of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinAbsIndx_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum absolute value of a vector and the index of the corresponding element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinAbsIndx_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum absolute value of a vector and the index of the corresponding element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinEvery_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes minimum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinEvery_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes minimum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinEvery_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes minimum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinEvery_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes minimum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinEvery_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes minimum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinEvery_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes minimum value for each pair of elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinIndx_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector and the index of the minimum element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinIndx_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector and the index of the minimum element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinIndx_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector and the index of the minimum element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinIndx_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the minimum value of a vector and the index of the minimum element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMax_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMax_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMax_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMax_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMax_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMax_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMax_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMaxIndx_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector and the indexes of the corresponding elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMaxIndx_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector and the indexes of the corresponding elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMaxIndx_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector and the indexes of the corresponding elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMaxIndx_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector and the indexes of the corresponding elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMaxIndx_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector and the indexes of the corresponding elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMaxIndx_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector and the indexes of the corresponding elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MinMaxIndx_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns the maximum and minimum values of a vector and the indexes of the corresponding elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MMul_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of matrix A and matrix B",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MMul_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of matrix A and matrix B",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MMul_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of matrix A and matrix B",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MMul_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of matrix A and matrix B",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_64s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_32sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_64sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Move_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data move interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_8u16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32f32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32f32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16u16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Mul_32sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies elements of two vectors",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_Low_32f16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32f16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_64s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_64f64s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_MulC_32sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies each element of a vector by a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Normalize_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Normalize elements of a real or complex vector using offest and division operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_Inf_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_Inf_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_Inf_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_Inf_32fc32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_Inf_64fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_Inf_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L1_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L1_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L1_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L1_32fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L1_64fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L1_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L1_16s64s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L2_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L2_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L2_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L2_32fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L2_64fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L2_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Norm_L2Sqr_16s64s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of a vector and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_Inf_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_Inf_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_Inf_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_Inf_32fc32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_Inf_64fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_Inf_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L1_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L1_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L1_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L1_32fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L1_64fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L1_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L1_16s64s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L2_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L2_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L2_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L2_32fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L2_64fc64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L2_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_NormDiff_L2Sqr_16s64s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the C, L1, L2, or L2Sqr norm of two vectors' difference and store the reuslt in norm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Not_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise NOT of the vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Not_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise NOT of the vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Not_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise NOT of the vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Not_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise NOT of the vector elements.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Or_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Or_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Or_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Or_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Or_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Or_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_OrC_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_OrC_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_OrC_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_OrC_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_OrC_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_OrC_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise OR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_16sc32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Phase_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the phase angles of elements of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PolarToCart_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the polar form magnitude/phase pairs stored in input vectors to Cartesian coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PolarToCart_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the polar form magnitude/phase pairs stored in input vectors to Cartesian coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PolarToCart_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the polar form magnitude/phase pairs stored in input vectors to Cartesian coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PolarToCart_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the polar form magnitude/phase pairs stored in input vectors to Cartesian coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PolarToCart_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Converts the polar form magnitude/phase pairs stored in input vectors to Cartesian coordinate form.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Pow_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function raises each element of vector src1 to the power of the corresponding element",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Pow_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function raises each element of vector src1 to the power of the corresponding element",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_16sc32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_PowerSpectr_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectrum of a complex vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_32f_A11",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_32f_A21",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_32f_A24",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_64f_A26",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_64f_A50",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_64f_A53",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_32fc_A11",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_32fc_A21",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Powx_32fc_A24",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes powx of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Pwelch_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectral density estimation of a signal vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Pwelch_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectral density estimation of a signal vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Pwelch_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectral density estimation of a signal vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Pwelch_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the power spectral density estimation of a signal vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGauss_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandGauss with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGauss_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandGauss with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGauss_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandGauss with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGauss_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandGauss with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussInit_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy with a given mean, stdDev, seed, and size.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussInit_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy with a given mean, stdDev, seed, and size.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy with a given mean, stdDev, seed, and size.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy with a given mean, stdDev, seed, and size.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussRelease_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussRelease_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandGaussRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandGauss policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformInit_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy with a given low, high and seed.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformInit_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy with a given low, high and seed.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy with a given low, high and seed.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy with a given low, high and seed.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniform_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandUniform with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniform_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandUniform with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniform_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandUniform with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniform_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a RandUniform with policy.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformRelease_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformRelease_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RandUniformRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "RandUniform policy ralease.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Real_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data get real interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Real_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data get real interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Real_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data get real interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RealToCplx_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data RealToCplx interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RealToCplx_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data RealToCplx interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RealToCplx_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data RealToCplx interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ReplaceNAN_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function reloaces not-a-number(NaN)elements of the source vector with value, other vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ReplaceNAN_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function reloaces not-a-number(NaN)elements of the source vector with value, other vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_RShiftC_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shifts bits in vector elements to the right.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleUp_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleUp_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleUp_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleUp_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleUp_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleUp_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleDown_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleDown_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleDown_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleDown_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleDown_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SampleDown_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Up-sample: Insert zero-valued samples between neighboring samples of a signal.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_64s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_32sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_64sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Set_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set value interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sin_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sin_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sin_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sin_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortAscend_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortAscend_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortAscend_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortAscend_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortAscend_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortAscend_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortDescend_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortDescend_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortDescend_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortDescend_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortDescend_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortDescend_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexAscend_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexAscend_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexAscend_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexAscend_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexAscend_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexAscend_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexDescend_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexDescend_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexDescend_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexDescend_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexDescend_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortIndexDescend_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Rearranges elements of the vector and their indexes.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_64u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_64s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixAscend_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_64u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_64s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixDescend_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sorts all elements of a vector using radix sorting algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_64u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_64s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexAscend_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_64u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_64s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SortRadixIndexDescend_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Indirectly sorts all elements of a vector using radix sorting algorithm",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqr_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_32s16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sqrt_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes a square root of each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_StdDev_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the standard deviation of the input vector pSrc,",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_StdDev_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the standard deviation of the input vector pSrc,",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_StdDev_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the standard deviation of the input vector pSrc,",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_StdDev_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "this function computes the standard deviation of the input vector pSrc,",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sub_32sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts the elements of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubC_32sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts a constant value from each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_8u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_16u_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_8u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_16u_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_16s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32s_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_16sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SubCRev_32sc_IS",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Subtracts each element of a vector from a constant value.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_16s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_16s32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_32s_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_16sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Sum_16sc32sc_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SumLn_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SumLn_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SumLn_32f64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SumLn_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes sine of each element of vector",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_24u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_64u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_24u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_SwapBytes_64u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Reverses the byte order of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tan_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function computes the tangent of each element of src,",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tan_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function computes the tangent of each element of src,",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbs_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTAbs_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the absolute values of elements of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTAbsVal_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTInv_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the inverse of vector elements after limiting their magnitudes by the given lower bound.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTInv_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the inverse of vector elements after limiting their magnitudes by the given lower bound.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTInv_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the inverse of vector elements after limiting their magnitudes by the given lower bound.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTInv_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the inverse of vector elements after limiting their magnitudes by the given lower bound.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTInv_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the inverse of vector elements after limiting their magnitudes by the given lower bound.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTInv_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the inverse of vector elements after limiting their magnitudes by the given lower bound.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LT_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GT_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTVal_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_GTVal_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Threshold_LTValGTVal_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs the threshold operation on the elements of a vector by limiting the element values by the",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tone_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a tone with a given frequency, phase, and magnitude.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tone_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a tone with a given frequency, phase, and magnitude.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tone_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a tone with a given frequency, phase, and magnitude.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tone_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a tone with a given frequency, phase, and magnitude.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tone_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a tone with a given frequency, phase, and magnitude.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Tone_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates a tone with a given frequency, phase, and magnitude.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_TopKInit_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the internal buffer required for the TopK function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_TopKInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the internal buffer required for the TopK function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_TopK_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns maximum K values of an array.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_TopK_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Returns maximum K values of an array.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_TopKRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the internal buffer required for the TopK function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Triangle_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates the triangle with the specified frequency rFreq, phase and magn.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Triangle_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates the triangle with the specified frequency rFreq, phase and magn.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Triangle_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates the triangle with the specified frequency rFreq, phase and magn.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Triangle_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates the triangle with the specified frequency rFreq, phase and magn.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Triangle_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates the triangle with the specified frequency rFreq, phase and magn.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Triangle_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Generates the triangle with the specified frequency rFreq, phase and magn.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorJaehne_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Create special vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorJaehne_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Create special vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorJaehne_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Create special vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorJaehne_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Create special vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorJaehne_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Create special vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorJaehne_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Create special vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorSlope_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Creates a slope vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorSlope_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Creates a slope vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorSlope_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Creates a slope vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorSlope_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Creates a slope vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorSlope_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Creates a slope vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorSlope_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Creates a slope vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_VectorSlope_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Creates a slope vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBartlett_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Barlett windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackman_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanStd_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinBlackmanOpt_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Blackman windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHamming_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hamming windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinHann_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Hann windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_16sc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_WinKaiser_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Multiplies a vector by a Kaiser windowing function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Xor_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Xor_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Xor_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Xor_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Xor_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Xor_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_XorC_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_XorC_16u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_XorC_32u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_XorC_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_XorC_16u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_XorC_32u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the bitwise XOR of a scalar value and each element of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_64s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_32sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_64sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Zero_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Data set to zero interface.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ZeroCrossing_16s32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function computes specific zero crossing measure according to the parameter zcType.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ZeroCrossing_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "This function computes specific zero crossing measure according to the parameter zcType.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CZT_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Compute the frequency response starting at a and stepping by w for m steps.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CZT_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Compute the frequency response starting at a and stepping by w for m steps.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CZT_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Compute the frequency response starting at a and stepping by w for m steps.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CZT_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Compute the frequency response starting at a and stepping by w for m steps.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCInit_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCInit_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTRToCInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTRToCInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToRInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToRInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the DFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToC_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToC_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTRToC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTRToC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToR_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToR_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes forward and backward DFT.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCRelease_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCRelease_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToCRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTRToCRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTRToCRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToRRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_DFTCToRRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsDFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCInit_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCInit_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTRToCInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTRToCInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToRInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToRInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initialization the FFT specification structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToC_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToC_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToC_32fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToC_64fc_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTRToC_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTRToC_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToR_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToR_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "The FFT specification structure for complex signals.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCRelease_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCRelease_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToCRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTRToCRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTRToCRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToRRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTCToRRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the HmppsFFTPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTShift_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shift zero-frequency component of discrete Fourier transform to center of spectrum",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTShift_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shift zero-frequency component of discrete Fourier transform to center of spectrum",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTShift_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shift zero-frequency component of discrete Fourier transform to center of spectrum",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FFTShift_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Shift zero-frequency component of discrete Fourier transform to center of spectrum",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Goertz_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes DFT for single frequency.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Goertz_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes DFT for single frequency.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Goertz_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes DFT for single frequency.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Goertz_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes DFT for single frequency.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_HilbertInit_32f32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the HilbertPolicy structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Hilbert_32f32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Hilbert transform is performed on real sequences to obtain complex sequences.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_HilbertRelease_32f32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the policy memory of Hilbert",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of AutoCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of AutoCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrInit_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of AutoCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrInit_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of AutoCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrNorm_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased auto-correlation of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrNorm_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased auto-correlation of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrNorm_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased auto-correlation of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_AutoCorrNorm_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased auto-correlation of a vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrInit_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrInit_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the CorrPolicy structure of CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrNorm_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased cross-correlation of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrNorm_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased cross-correlation of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrNorm_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased cross-correlation of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CrossCorrNorm_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Calculates normal, biased, and unbiased cross-correlation of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CorrRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the policy memory of AutoCorr or CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CorrRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the policy memory of AutoCorr or CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CorrRelease_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the policy memory of AutoCorr or CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_CorrRelease_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the policy memory of AutoCorr or CrossCorr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ConvInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the ConvPolicy structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ConvInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the ConvPolicy structure",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convolve_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs a finite linear convolution of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_Convolve_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs a finite linear convolution of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ConvRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the policy memory of Convolve",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ConvRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the policy memory of Convolve",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ConvBiased_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes the specified number of elements of the full finite linear convolution of two vectors.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedianInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Get the work buffer for the HMPPS_FilterMedian_xxx function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_32s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_8u_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_16s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_32s_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_32f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedian_64f_I",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes median values for each source vector element.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FilterMedianRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for FilterMedian filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSparseInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for FilterMedian filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSparseGetDlyLine_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for FilterMedian filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSparseSetDlyLine_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sets the delay line contents in the sparse FIR filter policy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSparse_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Filters a source vector through a sparse FIR filter.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSparseRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for sparse FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRGenInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the internal buffer required for computation of FIR coefficients.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRGenLowpass_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes lowpass FIR filter coefficients.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRGenHighpass_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes lowpass FIR filter coefficients.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRGenBandpass_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes lowpass FIR filter coefficients.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRGenBandstop_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Computes lowpass FIR filter coefficients.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRGenRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the internal buffer required for computation of FIR coefficients.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSGetTaps_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Retrieves the tap values from the FIRLMS filter.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSGetTaps32f_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Retrieves the tap values from the FIRLMS filter.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSGetDlyLine_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Retrieves the delay line contents from the FIRLMSPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSGetDlyLine32f_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Retrieves the delay line contents from the FIRLMSPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSSetDlyLine_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sets the delay line contents in the FIRLMSPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSSetDlyLine32f_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Sets the delay line contents in the FIRLMSPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes a FIRLMSPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSInit32f_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes a FIRLMSPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMS_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Filters a vector through the FIR least mean squares(LMS) filter.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMS32f_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Filters a vector through the FIR least mean squares(LMS) filter.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the FIRLMS structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRLMSRelease32f_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Releases all memory in the FIRLMS structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the FIR constant structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the FIR constant structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRInit_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the FIR constant structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRInit_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the FIR constant structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRInit32f_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the FIR constant structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSR_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs single-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSR_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs single-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSR_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs single-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSR_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs single-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSR_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs single-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSR_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs single-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSR32f_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs single-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRRelease_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRRelease_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRSRRelease32f_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for single-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the context structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRInit_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the context structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRInit_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the context structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRInit_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the context structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRInit32f_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the context structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMR_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs multi-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMR_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs multi-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMR_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs multi-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMR_16sc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs multi-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMR_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs multi-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMR_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs multi-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMR32f_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Performs multi-rate FIR filtering of a source vector.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRRelease_64f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRRelease_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRRelease_64fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_FIRMRRelease32f_32fc",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the memory structure for multi-rate FIR filtering.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseFixedInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the constant structure for Polyphase fixed factor Resample.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseFixedInit_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the constant structure for Polyphase fixed factor Resample.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseSetFixedFilter_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Set the filter coef in the Polyphase Resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseSetFixedFilter_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Set the filter coef in the Polyphase Resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseGetFixedFilter_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Get the filter coef in the Polyphase Resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseGetFixedFilter_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Get the filter coef in the Polyphase Resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseFixed_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Polyphase resample for fixed factor.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseFixed_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Polyphase resample for fixed factor.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseFixedRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the Polyphase resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseFixedRelease_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the Polyphase resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the constant structure for Polyphase Resample.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseInit_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Initializes the constant structure for Polyphase Resample.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphase_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Polyphase resample for not fixed factor.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphase_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Polyphase resample for not fixed factor.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the Polyphase resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPS_ResamplePolyphaseRelease_16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "信号库函数",
      "desc_en": "Release the Polyphase resample structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_BGRToYCbCr420_8u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a BGR image to the YCbCr image with 4:2:0 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_BGRToYCbCr420_8u_AC4P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a BGR image to the YCbCr image with 4:2:0 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_BGRToYUV420_8u_AC4P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a BGR image to the YUV420 image with 4:2:0 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToBGR_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YCbCr image to the BGR color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToBGR_8u_P3C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YCbCr image to the BGR color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToBGR_709CSC_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YCbCr image to the BGR_709CSC color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToBGR_709CSC_8u_P3C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YCbCr image to the BGR_709CSC color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCrCb422_8u_C3C2R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the YCrCb422 color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCrCb422_8u_P3C2R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the YCrCb422 color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ComputeThreshold_Otsu_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the value of the Otsu threshold",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s8u_C1Rs",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s16u_C1Rs",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32u_C1Rs",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s16u_C1Rs",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32u_C1Rs",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32u32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s8s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s32u_C1Rs",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8s64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_8u8s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u8s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16u16s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_16s8s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32u8u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32u8s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32u16u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32u16s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32u32s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s16u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32s16s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f8s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f16s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f32u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f32s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_32f32u_C1IR_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_64f8u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_64f8s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_64f16u_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Convert_64f16s_C1R_S",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C3AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C3AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C3AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C3AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C3AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_AC4C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_AC4C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_AC4C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_AC4C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_AC4C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C3MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C3MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C3MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C3MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C3MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_AC4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_AC4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_AC4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_AC4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_AC4MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C3CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C3CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C3CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C3CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C3CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C4CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C4CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C4CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C4CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C4CR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C3C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C3C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C3C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C3C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C3C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C4C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C4C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C4C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C4C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C4C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C1C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C1C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C1C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C1C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C1C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C1C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C1C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C1C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C1C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C1C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_C4P4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_C4P4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_C4P4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_C4P4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_C4P4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_8u_P4C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16u_P4C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_16s_P4C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32s_P4C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Copy_32f_P4C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to copy types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CopyWrapBorder_32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to CopyWrapBorder types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CopyWrapBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to CopyWrapBorder types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CopyWrapBorder_32s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to CopyWrapBorder types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CopyWrapBorder_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to CopyWrapBorder types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_FilterLaplacianInit_8u16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to applies Laplacian filter with border.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_FilterLaplacianInit_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to applies Laplacian filter with border.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_FilterLaplacianBorder_8u16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to applies Laplacian filter with border.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_FilterLaplacianBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to applies Laplacian filter with border.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_FilterLaplacianRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to applies Laplacian filter with border.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_HistogramInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the policy structure for the histogram function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_HistogramUniformInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the policy structure for the histogram function.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_HistogramGetLevels",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Returns the array with level values stored in the HmppiHistogramPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Histogram_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the intensity histogram of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_HistogramRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Releases all memory in the HmppiHistogramPolicy structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CountInRange_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CountInRange_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CountInRange_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CountInRange_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CountInRange_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_CountInRange_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Max_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the maximum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_8u_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16u_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_32f_C1MR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_8u_C3CMR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16u_C3CMR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_32f_C3CMR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Mean_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the mean of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Min_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Computes the minimum of image pixel values",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_1u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderInit_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphInit",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the MorphPolicy constant structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateBorder_1u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Dilate3x3_64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeBorder_1u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Erode3x3_64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_DilateRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ErodeRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_1u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphologyBorderRelease_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for MorphologyBorder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGrayInit_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the HmppiMorphGrayPolicy constant structure for morphology gray.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGrayInit_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the HmppiMorphGrayPolicy constant structure for morphology gray.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_GrayDilateBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs gray-kernel dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_GrayDilateBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs gray-kernel dilation of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_GrayErodeBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs gray-kernel erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_GrayErodeBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs gray-kernel erosion of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphCloseBorder_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Close an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphOpenBorder_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphTophatBorder_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Open an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGrayRelease_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for morphology gray.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGrayRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the memory structure for morphology gray.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphBlackhatBorder_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs black-hat operation on an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphGradientBorder_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Calculates morphological gradient of an image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructInit_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructInit_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructRelease_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructRelease_32f",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructDilate_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructDilate_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructDilate_64f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructDilate_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructErode_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructErode_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructErode_64f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphReconstructErode_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphSetMode",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Reconstructs an image by dilation/erosion.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_1u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvInit_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_1u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphAdvRelease_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Free the specification structure for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_MorphSetMode",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Sets the mask processing mode for advanced morphological operations.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_8u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16s_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_32f_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_8u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_16s_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_32f_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Use this function to convert types",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrI420ToBGR24_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YCbCr image with the 4:2:0 sampling to the BGR or RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrI420ToRGB24_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YCbCr image with the 4:2:0 sampling to the BGR or RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_8u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16s_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_32f_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_8u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_16s_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LT_32f_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Performs thresholding of pixel values in an image buffer, using the comparison for “less than”.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_8u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16s_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_32f_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_8u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_16s_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GT_32f_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "This function operates with ROI see Regions of Interest in Arm HMPP.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrI420ToBGR24_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YCbCr image with the 4:2:0 sampling to the BGR or RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrI420ToRGB24_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YCbCr image with the 4:2:0 sampling to the BGR or RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ResizeLinearInit_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Initializes the constant structure for linear resize.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ResizeLinear_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Resize the source image to destination image with linear algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ResizeLinear_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Resize the source image to destination image with linear algorithm.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ResizeLinearRelease_8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Release the resize structure.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr_8u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr_8u_AC4P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV_8u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV_8u_AC4P4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV420_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the 4:2:0 YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV420_8u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the 4:2:0 YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV420_8u_P3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the 4:2:0 YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV420_8u_C3P3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an RGB image to the 4:2:0 YUV color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV422_8u_C3C2R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YUV422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV422_8u_C3P3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YUV422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV422_8u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YUV422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV422_8u_P3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YUV422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYUV422_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YUV422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr422_8u_C3C2R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr422_8u_C3P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_RGBToYCbCr422_8u_P3C2R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u32s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u32s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u32s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_8u32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16u8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16s8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_32s8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16u8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16s8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_32s8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16u8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16s8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_32s8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16u8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_16s8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_32s8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_32f8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_32f8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Scale_32f8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f8s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f32s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_8s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_ScaleC_64f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_8u_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16u_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_16s_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_GTVal_32f_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_8u_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16u_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_16s_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTVal_32f_C4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_8u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16s_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_32f_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_8u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_16s_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_LTValGTVal_32f_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_8u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16u_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16s_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_32f_C1R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16s_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_32f_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16s_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_32f_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_8u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16u_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16s_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_32f_C1IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_8u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16u_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16s_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_32f_C3IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_8u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16u_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_16s_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_Threshold_Val_32f_AC4IR",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an RGB image to the YCbCr422 color model image with 4:2:2 sampling format",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToRGB_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToRGB_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToRGB_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToRGB_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCrToRGB_8u_P3C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCr422ToRGB_8u_C2C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCr422ToRGB_8u_C2C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCr422ToRGB_8u_C2P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YCbCr422ToRGB_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts an YCbCr image to the RGB color model.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV420ToBGR_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YUV420 image with 4:2:0 sampling format to the BGR image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV422ToRGB_8u_C2C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YUV422 image with 4:2:2 sampling format to the RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV422ToRGB_8u_P3AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YUV422 image with 4:2:2 sampling format to the RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV422ToRGB_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YUV422 image with 4:2:2 sampling format to the RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV422ToRGB_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YUV422 image with 4:2:2 sampling format to the RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV422ToRGB_8u_P3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YUV422 image with 4:2:2 sampling format to the RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV422ToRGB_8u_P3C3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "Converts a YUV422 image with 4:2:2 sampling format to the RGB image.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUVToRGB_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YUV image to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUVToRGB_8u_C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YUV image to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUVToRGB_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YUV image to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUVToRGB_8u_C3C4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YUV image to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUVToRGB_8u_AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts an YUV image to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV420ToRGB_8u_P3C3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts a YUV image that has 4:2:0 sampling format to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV420ToRGB_8u_P3AC4R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts a YUV image that has 4:2:0 sampling format to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV420ToRGB_8u_P3R",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts a YUV image that has 4:2:0 sampling format to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV420ToRGB_8u_P3C3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts a YUV image that has 4:2:0 sampling format to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPI_YUV420ToRGB_8u_P3",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "图像库函数",
      "desc_en": "converts a YUV image that has 4:2:0 sampling format to the RGB color model",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode amrnb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode amrnb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_DecodeRelease",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode amrnb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_EncodeInit_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to encode amrnb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_Encode_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to encode amrnb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_EncodeRelease_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to encode amrnb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to encode amrnb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrnb_GetEncodeDstBufLen_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Get dst buffer length user need when encoding",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Initializes the structure for amrwb decoder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode amrwb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode amrwb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for Amrwb Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_EncodeInit_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Initializes the structure for amrwb encoder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_Encode_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to encode amrwb",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_EncodeRelease_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for amrwb encoder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Amrwb_GetEncodeDstBufLen_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "compute the possible length ouf output buffer.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G726_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Initializes the constant structure for G726 Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G726_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Decode G726 bitStream.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G726_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Decode G726 bitStream.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G726_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for G726 Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Initializes the constant structure for G729a Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Decode G729a bitstream.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Decode G729a bitstream.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for G729a Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_EncodeInit_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for G729a Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_EncodeRelease_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for G729a Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_GetEncodeDstBufLen_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for G729a Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_G729a_Encode_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for G729a Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmefr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmefr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Init the constant structure for Gsmefr Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for Gsmefr Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_GetEncodeDstBufLen_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for Gsmefr Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_Encode_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for Gsmefr Decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_EncodeInit_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Init the constant structure for Gsmefr Encode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmefr_EncodeRelease_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the constant structure for Gsmefr Encode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_EncodeInit_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_EncodeRelease_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_GetEncodeDstBufLen_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmfr_Encode_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmfr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmhr_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmhr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmhr_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmhr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmhr_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmhr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Gsmhr_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode gsmhr",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Silk_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode Silk",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Silk_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode Silk",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Silk_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode Silk",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Silk_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Use this function to decode Silk",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Evrc_DecodeInit_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Initializes the constant structure for evrc decoder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Evrc_GetDecodeDstBufLen_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Evrc audio decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Evrc_Decode_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Evrc audio decode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Evrc_DecodeRelease_8u16s",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Release the memory structure for evrc decoder.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Wav_Encode_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Wav audio encode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    },
    {
      "name": "HMPPA_Wav_GetEncodeDstBufLen_16s8u",
      "library": "hmpp",
      "headerfile": "",
      "desc_cn": "音频库函数",
      "desc_en": "Wav audio encode.",
      "benefit_cn": "基于Neon指令实现性能优化",
      "benefit_en": "Performance optimized based on NEON instructions"
    }
  ],
  "Intrinsic": [
    {
      "name": "vadd_s8",
      "full name": "int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b)",
      "Intel name": "_mm_add_pi8",
      "Intel Asm": "paddb",
      "Arm Asm": "add",
      "function_en": "[vector] add [8]",
      "function_cn": "[向量]加[8]"
    },
    {
      "name": "vaddq_s8",
      "full name": "int8x16_t vaddq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_add_epi8",
      "Intel Asm": "paddw",
      "Arm Asm": "add",
      "function_en": "[vector] add [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vadd_s16",
      "full name": "int16x4_t vadd_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_add_pi16",
      "Intel Asm": "paddw",
      "Arm Asm": "add",
      "function_en": "[vector] add [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddq_s16",
      "full name": "int16x8_t vaddq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_add_epi16",
      "Intel Asm": "paddw",
      "Arm Asm": "add",
      "function_en": "[vector] add [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vadd_s32",
      "full name": "int32x2_t vadd_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "_mm_add_pi32",
      "Intel Asm": "paddd",
      "Arm Asm": "add",
      "function_en": "[vector] add [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddq_s32",
      "full name": "int32x4_t vaddq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_add_epi32",
      "Intel Asm": "paddd",
      "Arm Asm": "add",
      "function_en": "[vector] add [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vadd_s64",
      "full name": "int64x1_t vadd_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [64]",
      "function_cn": "[向量] 加 [64]"
    },
    {
      "name": "vaddq_s64",
      "full name": "int64x2_t vaddq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_add_epi64",
      "Intel Asm": "paddq",
      "Arm Asm": "add",
      "function_en": "[vector] add [64]",
      "function_cn": "[向量] 加 [64]"
    },
    {
      "name": "vadd_u8",
      "full name": "uint8x8_t vadd_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddq_u8",
      "full name": "uint8x16_t vaddq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vadd_u16",
      "full name": "uint16x4_t vadd_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddq_u16",
      "full name": "uint16x8_t vaddq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vadd_u32",
      "full name": "uint32x2_t vadd_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddq_u32",
      "full name": "uint32x4_t vaddq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vadd_u64",
      "full name": "uint64x1_t vadd_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [64]",
      "function_cn": "[向量] 加 [64]"
    },
    {
      "name": "vaddq_u64",
      "full name": "uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[vector] add [64]",
      "function_cn": "[向量] 加 [64]"
    },
    {
      "name": "vadd_f32",
      "full name": "float32x2_t vadd_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fadd",
      "function_en": "[vector] add [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddq_f32",
      "full name": "float32x4_t vaddq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_add_ps",
      "Intel Asm": "addps",
      "Arm Asm": "fadd",
      "function_en": "[vector] add [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vadd_f64",
      "full name": "float64x1_t vadd_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fadd",
      "function_en": "[vector] add [64]",
      "function_cn": "[向量] 加 [64]"
    },
    {
      "name": "vaddq_f64",
      "full name": "float64x2_t vaddq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_add_pd",
      "Intel Asm": "addpd",
      "Arm Asm": "fadd",
      "function_en": "[vector] add [64]",
      "function_cn": "[向量] 加 [64]"
    },
    {
      "name": "vaddd_s64",
      "full name": "int64_t vaddd_s64(int64_t a, int64_t b)",
      "Intel name": "_mm_add_si64",
      "Intel Asm": "paddq",
      "Arm Asm": "add",
      "function_en": "[scalar] add [64]",
      "function_cn": "[标量] 加 [64]"
    },
    {
      "name": "vaddd_u64",
      "full name": "uint64_t vaddd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "add",
      "function_en": "[scalar] add [64]",
      "function_cn": "[标量] 加 [64]"
    },
    {
      "name": "vqadd_s8",
      "full name": "int8x8_t vqadd_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "_mm_adds_pi8",
      "Intel Asm": "paddsb",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vqaddq_s8",
      "full name": "int8x16_t vqaddq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_adds_epi8",
      "Intel Asm": "paddsb",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vqadd_s16",
      "full name": "int16x4_t vqadd_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_adds_pi16",
      "Intel Asm": "paddsw",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vqaddq_s16",
      "full name": "int16x8_t vqaddq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_adds_epi16",
      "Intel Asm": "paddsw",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vqadd_s32",
      "full name": "int32x2_t vqadd_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vqaddq_s32",
      "full name": "int32x4_t vqaddq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vqadd_s64",
      "full name": "int64x1_t vqadd_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vqaddq_s64",
      "full name": "int64x2_t vqaddq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[vector] sqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vqadd_u8",
      "full name": "uint8x8_t vqadd_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "_mm_adds_pu8",
      "Intel Asm": "paddusb",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vqaddq_u8",
      "full name": "uint8x16_t vqaddq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "_mm_adds_epu8",
      "Intel Asm": "paddusb",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vqadd_u16",
      "full name": "uint16x4_t vqadd_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "_mm_adds_pu16",
      "Intel Asm": "paddusw",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vqaddq_u16",
      "full name": "uint16x8_t vqaddq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "_mm_adds_epu16",
      "Intel Asm": "paddusw",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vqadd_u32",
      "full name": "uint32x2_t vqadd_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vqaddq_u32",
      "full name": "uint32x4_t vqaddq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vqadd_u64",
      "full name": "uint64x1_t vqadd_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vqaddq_u64",
      "full name": "uint64x2_t vqaddq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[vector] uqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vqaddb_s8",
      "full name": "int8_t vqaddb_s8(int8_t a, int8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[scalar] sqadd [8]",
      "function_cn": "[标量] 饱和加 [8]"
    },
    {
      "name": "vqaddh_s16",
      "full name": "int16_t vqaddh_s16(int16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[scalar] sqadd [16]",
      "function_cn": "[标量] 饱和加 [16]"
    },
    {
      "name": "vqadds_s32",
      "full name": "int32_t vqadds_s32(int32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[scalar] sqadd [32]",
      "function_cn": "[标量] 饱和加 [32]"
    },
    {
      "name": "vqaddd_s64",
      "full name": "int64_t vqaddd_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqadd",
      "function_en": "[scalar] sqadd [64]",
      "function_cn": "[标量] 饱和加 [64]"
    },
    {
      "name": "vqaddb_u8",
      "full name": "uint8_t vqaddb_u8(uint8_t a, uint8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[scalar] uqadd [8]",
      "function_cn": "[标量] 饱和加 [8]"
    },
    {
      "name": "vqaddh_u16",
      "full name": "uint16_t vqaddh_u16(uint16_t a, uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[scalar] uqadd [16]",
      "function_cn": "[标量] 饱和加 [16]"
    },
    {
      "name": "vqadds_u32",
      "full name": "uint32_t vqadds_u32(uint32_t a, uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[scalar] uqadd [32]",
      "function_cn": "[标量] 饱和加 [32]"
    },
    {
      "name": "vqaddd_u64",
      "full name": "uint64_t vqaddd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqadd",
      "function_en": "[scalar] uqadd [64]",
      "function_cn": "[标量] 饱和加 [64]"
    },
    {
      "name": "vuqadd_s8",
      "full name": "int8x8_t vuqadd_s8(int8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vuqaddq_s8",
      "full name": "int8x16_t vuqaddq_s8(int8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vuqadd_s16",
      "full name": "int16x4_t vuqadd_s16(int16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vuqaddq_s16",
      "full name": "int16x8_t vuqaddq_s16(int16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vuqadd_s32",
      "full name": "int32x2_t vuqadd_s32(int32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vuqaddq_s32",
      "full name": "int32x4_t vuqaddq_s32(int32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vuqadd_s64",
      "full name": "int64x1_t vuqadd_s64(int64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vuqaddq_s64",
      "full name": "int64x2_t vuqaddq_s64(int64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[vector] suqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vuqaddb_s8",
      "full name": "int8_t vuqaddb_s8(int8_t a, uint8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[scalar] suqadd [8]",
      "function_cn": "[标量] 饱和加 [8]"
    },
    {
      "name": "vuqaddh_s16",
      "full name": "int16_t vuqaddh_s16(int16_t a, uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[scalar] suqadd [16]",
      "function_cn": "[标量] 饱和加 [16]"
    },
    {
      "name": "vuqadds_s32",
      "full name": "int32_t vuqadds_s32(int32_t a, uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[scalar] suqadd [32]",
      "function_cn": "[标量] 饱和加 [32]"
    },
    {
      "name": "vuqaddd_s64",
      "full name": "int64_t vuqaddd_s64(int64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "suqadd",
      "function_en": "[scalar] suqadd [64]",
      "function_cn": "[标量] 饱和加 [64]"
    },
    {
      "name": "vsqadd_u8",
      "full name": "uint8x8_t vsqadd_u8(uint8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vsqaddq_u8",
      "full name": "uint8x16_t vsqaddq_u8(uint8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [8]",
      "function_cn": "[向量] 饱和加 [8]"
    },
    {
      "name": "vsqadd_u16",
      "full name": "uint16x4_t vsqadd_u16(uint16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vsqaddq_u16",
      "full name": "uint16x8_t vsqaddq_u16(uint16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [16]",
      "function_cn": "[向量] 饱和加 [16]"
    },
    {
      "name": "vsqadd_u32",
      "full name": "uint32x2_t vsqadd_u32(uint32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vsqaddq_u32",
      "full name": "uint32x4_t vsqaddq_u32(uint32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [32]",
      "function_cn": "[向量] 饱和加 [32]"
    },
    {
      "name": "vsqadd_u64",
      "full name": "uint64x1_t vsqadd_u64(uint64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vsqaddq_u64",
      "full name": "uint64x2_t vsqaddq_u64(uint64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[vector] usqadd [64]",
      "function_cn": "[向量] 饱和加 [64]"
    },
    {
      "name": "vsqaddb_u8",
      "full name": "uint8_t vsqaddb_u8(uint8_t a, int8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[scalar] usqadd [8]",
      "function_cn": "[标量] 饱和加 [8]"
    },
    {
      "name": "vsqaddh_u16",
      "full name": "uint16_t vsqaddh_u16(uint16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[scalar] usqadd [16]",
      "function_cn": "[标量] 饱和加 [16]"
    },
    {
      "name": "vsqadds_u32",
      "full name": "uint32_t vsqadds_u32(uint32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[scalar] usqadd [32]",
      "function_cn": "[标量] 饱和加 [32]"
    },
    {
      "name": "vsqaddd_u64",
      "full name": "uint64_t vsqaddd_u64(uint64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usqadd",
      "function_en": "[scalar] usqadd [64]",
      "function_cn": "[标量] 饱和加 [64]"
    },
    {
      "name": "vaddhn_s16",
      "full name": "int8x8_t vaddhn_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn",
      "function_en": "[vector] addhn [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vaddhn_s32",
      "full name": "int16x4_t vaddhn_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn",
      "function_en": "[vector] addhn [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vaddhn_s64",
      "full name": "int32x2_t vaddhn_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn",
      "function_en": "[vector] addhn [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vaddhn_u16",
      "full name": "uint8x8_t vaddhn_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn",
      "function_en": "[vector] addhn [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vaddhn_u32",
      "full name": "uint16x4_t vaddhn_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn",
      "function_en": "[vector] addhn [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vaddhn_u64",
      "full name": "uint32x2_t vaddhn_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn",
      "function_en": "[vector] addhn [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vaddhn_high_s16",
      "full name": "int8x16_t vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn2",
      "function_en": "[vector] addhn2 [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vaddhn_high_s32",
      "full name": "int16x8_t vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn2",
      "function_en": "[vector] addhn2 [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vaddhn_high_s64",
      "full name": "int32x4_t vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn2",
      "function_en": "[vector] addhn2 [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vaddhn_high_u16",
      "full name": "uint8x16_t vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn2",
      "function_en": "[vector] addhn2 [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vaddhn_high_u32",
      "full name": "uint16x8_t vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn2",
      "function_en": "[vector] addhn2 [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vaddhn_high_u64",
      "full name": "uint32x4_t vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addhn2",
      "function_en": "[vector] addhn2 [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vraddhn_s16",
      "full name": "int8x8_t vraddhn_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn",
      "function_en": "[vector] raddhn [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vraddhn_s32",
      "full name": "int16x4_t vraddhn_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn",
      "function_en": "[vector] raddhn [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vraddhn_s64",
      "full name": "int32x2_t vraddhn_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn",
      "function_en": "[vector] raddhn [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vraddhn_u16",
      "full name": "uint8x8_t vraddhn_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn",
      "function_en": "[vector] raddhn [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vraddhn_u32",
      "full name": "uint16x4_t vraddhn_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn",
      "function_en": "[vector] raddhn [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vraddhn_u64",
      "full name": "uint32x2_t vraddhn_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn",
      "function_en": "[vector] raddhn [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vraddhn_high_s16",
      "full name": "int8x16_t vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn2",
      "function_en": "[vector] raddhn2 [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vraddhn_high_s32",
      "full name": "int16x8_t vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn2",
      "function_en": "[vector] raddhn2 [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vraddhn_high_s64",
      "full name": "int32x4_t vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn2",
      "function_en": "[vector] raddhn2 [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vraddhn_high_u16",
      "full name": "uint8x16_t vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn2",
      "function_en": "[vector] raddhn2 [16]",
      "function_cn": "[向量] 窄型加 [16]"
    },
    {
      "name": "vraddhn_high_u32",
      "full name": "uint16x8_t vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn2",
      "function_en": "[vector] raddhn2 [32]",
      "function_cn": "[向量] 窄型加 [32]"
    },
    {
      "name": "vraddhn_high_u64",
      "full name": "uint32x4_t vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "raddhn2",
      "function_en": "[vector] raddhn2 [64]",
      "function_cn": "[向量] 窄型加 [64]"
    },
    {
      "name": "vaddl_s8",
      "full name": "int16x8_t vaddl_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddl",
      "function_en": "[vector] saddl [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddl_s16",
      "full name": "int32x4_t vaddl_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddl",
      "function_en": "[vector] saddl [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddl_s32",
      "full name": "int64x2_t vaddl_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddl",
      "function_en": "[vector] saddl [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddl_u8",
      "full name": "uint16x8_t vaddl_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddl",
      "function_en": "[vector] uaddl [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddl_u16",
      "full name": "uint32x4_t vaddl_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddl",
      "function_en": "[vector] uaddl [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddl_u32",
      "full name": "uint64x2_t vaddl_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddl",
      "function_en": "[vector] uaddl [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddl_high_s8",
      "full name": "int16x8_t vaddl_high_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddl2",
      "function_en": "[vector] saddl2 [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddl_high_s16",
      "full name": "int32x4_t vaddl_high_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddl2",
      "function_en": "[vector] saddl2 [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddl_high_s32",
      "full name": "int64x2_t vaddl_high_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddl2",
      "function_en": "[vector] saddl2 [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddl_high_u8",
      "full name": "uint16x8_t vaddl_high_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddl2",
      "function_en": "[vector] uaddl2 [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddl_high_u16",
      "full name": "uint32x4_t vaddl_high_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddl2",
      "function_en": "[vector] uaddl2 [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddl_high_u32",
      "full name": "uint64x2_t vaddl_high_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddl2",
      "function_en": "[vector] uaddl2 [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddw_s8",
      "full name": "int16x8_t vaddw_s8(int16x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddw",
      "function_en": "[vector] saddw [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddw_s16",
      "full name": "int32x4_t vaddw_s16(int32x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddw",
      "function_en": "[vector] saddw [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddw_s32",
      "full name": "int64x2_t vaddw_s32(int64x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddw",
      "function_en": "[vector] saddw [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddw_u8",
      "full name": "uint16x8_t vaddw_u8(uint16x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddw",
      "function_en": "[vector] uaddw [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddw_u16",
      "full name": "uint32x4_t vaddw_u16(uint32x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddw",
      "function_en": "[vector] uaddw [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddw_u32",
      "full name": "uint64x2_t vaddw_u32(uint64x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddw",
      "function_en": "[vector] uaddw [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddw_high_s8",
      "full name": "int16x8_t vaddw_high_s8(int16x8_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddw2",
      "function_en": "[vector] saddw2 [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddw_high_s16",
      "full name": "int32x4_t vaddw_high_s16(int32x4_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddw2",
      "function_en": "[vector] saddw2 [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddw_high_s32",
      "full name": "int64x2_t vaddw_high_s32(int64x2_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddw2",
      "function_en": "[vector] saddw2 [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vaddw_high_u8",
      "full name": "uint16x8_t vaddw_high_u8(uint16x8_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddw2",
      "function_en": "[vector] uaddw2 [8]",
      "function_cn": "[向量] 加 [8]"
    },
    {
      "name": "vaddw_high_u16",
      "full name": "uint32x4_t vaddw_high_u16(uint32x4_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddw2",
      "function_en": "[vector] uaddw2 [16]",
      "function_cn": "[向量] 加 [16]"
    },
    {
      "name": "vaddw_high_u32",
      "full name": "uint64x2_t vaddw_high_u32(uint64x2_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddw2",
      "function_en": "[vector] uaddw2 [32]",
      "function_cn": "[向量] 加 [32]"
    },
    {
      "name": "vhadd_s8",
      "full name": "int8x8_t vhadd_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shadd",
      "function_en": "[vector] shadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vhaddq_s8",
      "full name": "int8x16_t vhaddq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shadd",
      "function_en": "[vector] shadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vhadd_s16",
      "full name": "int16x4_t vhadd_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shadd",
      "function_en": "[vector] shadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vhaddq_s16",
      "full name": "int16x8_t vhaddq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shadd",
      "function_en": "[vector] shadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vhadd_s32",
      "full name": "int32x2_t vhadd_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shadd",
      "function_en": "[vector] shadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vhaddq_s32",
      "full name": "int32x4_t vhaddq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shadd",
      "function_en": "[vector] shadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vhadd_u8",
      "full name": "uint8x8_t vhadd_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhadd",
      "function_en": "[vector] uhadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vhaddq_u8",
      "full name": "uint8x16_t vhaddq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhadd",
      "function_en": "[vector] uhadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vhadd_u16",
      "full name": "uint16x4_t vhadd_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhadd",
      "function_en": "[vector] uhadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vhaddq_u16",
      "full name": "uint16x8_t vhaddq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhadd",
      "function_en": "[vector] uhadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vhadd_u32",
      "full name": "uint32x2_t vhadd_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhadd",
      "function_en": "[vector] uhadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vhaddq_u32",
      "full name": "uint32x4_t vhaddq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhadd",
      "function_en": "[vector] uhadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vrhadd_s8",
      "full name": "int8x8_t vrhadd_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srhadd",
      "function_en": "[vector] srhadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vrhaddq_s8",
      "full name": "int8x16_t vrhaddq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srhadd",
      "function_en": "[vector] srhadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vrhadd_s16",
      "full name": "int16x4_t vrhadd_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srhadd",
      "function_en": "[vector] srhadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vrhaddq_s16",
      "full name": "int16x8_t vrhaddq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srhadd",
      "function_en": "[vector] srhadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vrhadd_s32",
      "full name": "int32x2_t vrhadd_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srhadd",
      "function_en": "[vector] srhadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vrhaddq_s32",
      "full name": "int32x4_t vrhaddq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srhadd",
      "function_en": "[vector] srhadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vrhadd_u8",
      "full name": "uint8x8_t vrhadd_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "_mm_avg_pu8",
      "Intel Asm": "pavgb",
      "Arm Asm": "urhadd",
      "function_en": "[vector] urhadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vrhaddq_u8",
      "full name": "uint8x16_t vrhaddq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "_mm_avg_epu8",
      "Intel Asm": "pavgb",
      "Arm Asm": "urhadd",
      "function_en": "[vector] urhadd [8]",
      "function_cn": "[向量] 相加并右移一位 [8]"
    },
    {
      "name": "vrhadd_u16",
      "full name": "uint16x4_t vrhadd_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "_mm_avg_pu16",
      "Intel Asm": "pavgw",
      "Arm Asm": "urhadd",
      "function_en": "[vector] urhadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vrhaddq_u16",
      "full name": "uint16x8_t vrhaddq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "_mm_avg_epu16",
      "Intel Asm": "pavgw",
      "Arm Asm": "urhadd",
      "function_en": "[vector] urhadd [16]",
      "function_cn": "[向量] 相加并右移一位 [16]"
    },
    {
      "name": "vrhadd_u32",
      "full name": "uint32x2_t vrhadd_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urhadd",
      "function_en": "[vector] urhadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vrhaddq_u32",
      "full name": "uint32x4_t vrhaddq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urhadd",
      "function_en": "[vector] urhadd [32]",
      "function_cn": "[向量] 相加并右移一位 [32]"
    },
    {
      "name": "vmul_s8",
      "full name": "int8x8_t vmul_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [8]",
      "function_cn": "[向量] 乘 [8]"
    },
    {
      "name": "vmulq_s8",
      "full name": "int8x16_t vmulq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [8]",
      "function_cn": "[向量] 乘 [8]"
    },
    {
      "name": "vmul_s16",
      "full name": "int16x4_t vmul_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_mullo_pi16",
      "Intel Asm": "pmullw",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmulq_s16",
      "full name": "int16x8_t vmulq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_mullo_epi16",
      "Intel Asm": "pmullw",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmul_s32",
      "full name": "int32x2_t vmul_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulq_s32",
      "full name": "int32x4_t vmulq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_mullo_epi32",
      "Intel Asm": "pmulld",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmul_u8",
      "full name": "uint8x8_t vmul_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [8]",
      "function_cn": "[向量] 乘 [8]"
    },
    {
      "name": "vmulq_u8",
      "full name": "uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [8]",
      "function_cn": "[向量] 乘 [8]"
    },
    {
      "name": "vmul_u16",
      "full name": "uint16x4_t vmul_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmulq_u16",
      "full name": "uint16x8_t vmulq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmul_u32",
      "full name": "uint32x2_t vmul_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulq_u32",
      "full name": "uint32x4_t vmulq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmul_f32",
      "full name": "float32x2_t vmul_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulq_f32",
      "full name": "float32x4_t vmulq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_mul_ps",
      "Intel Asm": "mulps",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmul_p8",
      "full name": "poly8x8_t vmul_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "pmul",
      "function_en": "[vector] pmul [8]",
      "function_cn": "[向量] 多项式乘 [8]"
    },
    {
      "name": "vmulq_p8",
      "full name": "poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "pmul",
      "function_en": "[vector] pmul [8]",
      "function_cn": "[向量] 多项式乘 [8]"
    },
    {
      "name": "vmul_f64",
      "full name": "float64x1_t vmul_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulq_f64",
      "full name": "float64x2_t vmulq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_mul_pd",
      "Intel Asm": "mulpd",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulx_f32",
      "full name": "float32x2_t vmulx_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulxq_f32",
      "full name": "float32x4_t vmulxq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulx_f64",
      "full name": "float64x1_t vmulx_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulxq_f64",
      "full name": "float64x2_t vmulxq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulxs_f32",
      "full name": "float32_t vmulxs_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulxd_f64",
      "full name": "float64_t vmulxd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulx_lane_f32",
      "full name": "float32x2_t vmulx_lane_f32(float32x2_t a, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulxq_lane_f32",
      "full name": "float32x4_t vmulxq_lane_f32(float32x4_t a, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulx_lane_f64",
      "full name": "float64x1_t vmulx_lane_f64(float64x1_t a, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulxq_lane_f64",
      "full name": "float64x2_t vmulxq_lane_f64(float64x2_t a, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulxs_lane_f32",
      "full name": "float32_t vmulxs_lane_f32(float32_t a, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulxd_lane_f64",
      "full name": "float64_t vmulxd_lane_f64(float64_t a, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulx_laneq_f32",
      "full name": "float32x2_t vmulx_laneq_f32(float32x2_t a, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulxq_laneq_f32",
      "full name": "float32x4_t vmulxq_laneq_f32(float32x4_t a, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulx_laneq_f64",
      "full name": "float64x1_t vmulx_laneq_f64(float64x1_t a, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulxq_laneq_f64",
      "full name": "float64x2_t vmulxq_laneq_f64(float64x2_t a, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vmulxs_laneq_f32",
      "full name": "float32_t vmulxs_laneq_f32(float32_t a, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulxd_laneq_f64",
      "full name": "float64_t vmulxd_laneq_f64(float64_t a, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmulx",
      "function_en": "[vector] fmulx [64]",
      "function_cn": "[向量] 乘 [64]"
    },
    {
      "name": "vdiv_f32",
      "full name": "float32x2_t vdiv_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fdiv",
      "function_en": "[vector] fdiv [32]",
      "function_cn": "[向量] 除 [32]"
    },
    {
      "name": "vdivq_f32",
      "full name": "float32x4_t vdivq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_div_ps",
      "Intel Asm": "divps",
      "Arm Asm": "fdiv",
      "function_en": "[vector] fdiv [32]",
      "function_cn": "[向量] 除 [32]"
    },
    {
      "name": "vdiv_f64",
      "full name": "float64x1_t vdiv_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fdiv",
      "function_en": "[vector] fdiv [64]",
      "function_cn": "[向量] 除 [64]"
    },
    {
      "name": "vdivq_f64",
      "full name": "float64x2_t vdivq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_div_pd",
      "Intel Asm": "divpd",
      "Arm Asm": "fdiv",
      "function_en": "[vector] fdiv [64]",
      "function_cn": "[向量] 除 [64]"
    },
    {
      "name": "vmla_s8",
      "full name": "int8x8_t vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmlaq_s8",
      "full name": "int8x16_t vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmla_s16",
      "full name": "int16x4_t vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlaq_s16",
      "full name": "int16x8_t vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmla_s32",
      "full name": "int32x2_t vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlaq_s32",
      "full name": "int32x4_t vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmla_u8",
      "full name": "uint8x8_t vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmlaq_u8",
      "full name": "uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmla_u16",
      "full name": "uint16x4_t vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlaq_u16",
      "full name": "uint16x8_t vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmla_u32",
      "full name": "uint32x2_t vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlaq_u32",
      "full name": "uint32x4_t vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmla_f32",
      "full name": "float32x2_t vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "[vector] flma [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlaq_f32",
      "full name": "float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c)",
      "Intel name": "_mm_fmadd_ps",
      "Intel Asm": "vfmadd132ps",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmla_f64",
      "full name": "float64x1_t vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vmlaq_f64",
      "full name": "float64x2_t vmlaq_f64(float64x2_t a, float64x2_t b, float64x2_t c)",
      "Intel name": "_mm_fmadd_pd",
      "Intel Asm": "vfmadd132pd",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vmlal_s8",
      "full name": "int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmlal_s16",
      "full name": "int32x4_t vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlal_s32",
      "full name": "int64x2_t vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlal_u8",
      "full name": "uint16x8_t vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmlal_u16",
      "full name": "uint32x4_t vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlal_u32",
      "full name": "uint64x2_t vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlal_high_s8",
      "full name": "int16x8_t vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmlal_high_s16",
      "full name": "int32x4_t vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlal_high_s32",
      "full name": "int64x2_t vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlal_high_u8",
      "full name": "uint16x8_t vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [8]",
      "function_cn": "[向量] 乘加 [8]"
    },
    {
      "name": "vmlal_high_u16",
      "full name": "uint32x4_t vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlal_high_u32",
      "full name": "uint64x2_t vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmls_s8",
      "full name": "int8x8_t vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmlsq_s8",
      "full name": "int8x16_t vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmls_s16",
      "full name": "int16x4_t vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_s16",
      "full name": "int16x8_t vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_s32",
      "full name": "int32x2_t vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_s32",
      "full name": "int32x4_t vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_u8",
      "full name": "uint8x8_t vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmlsq_u8",
      "full name": "uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmls_u16",
      "full name": "uint16x4_t vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_u16",
      "full name": "uint16x8_t vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_u32",
      "full name": "uint32x2_t vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_u32",
      "full name": "uint32x4_t vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_f32",
      "full name": "float32x2_t vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_f32",
      "full name": "float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c)",
      "Intel name": "_mm_fmsub_ps",
      "Intel Asm": "vfmsub132ps",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_f64",
      "full name": "float64x1_t vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vmlsq_f64",
      "full name": "float64x2_t vmlsq_f64(float64x2_t a, float64x2_t b, float64x2_t c)",
      "Intel name": "_mm_fmsub_pd",
      "Intel Asm": "vfmsub132pd",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vmlsl_s8",
      "full name": "int16x8_t vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmlsl_s16",
      "full name": "int32x4_t vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_s32",
      "full name": "int64x2_t vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_u8",
      "full name": "uint16x8_t vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmlsl_u16",
      "full name": "uint32x4_t vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_u32",
      "full name": "uint64x2_t vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_high_s8",
      "full name": "int16x8_t vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmlsl_high_s16",
      "full name": "int32x4_t vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_high_s32",
      "full name": "int64x2_t vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_high_u8",
      "full name": "uint16x8_t vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [8]",
      "function_cn": "[向量] 乘减 [8]"
    },
    {
      "name": "vmlsl_high_u16",
      "full name": "uint32x4_t vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_high_u32",
      "full name": "uint64x2_t vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfma_f32",
      "full name": "float32x2_t vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfmaq_f32",
      "full name": "float32x4_t vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c)",
      "Intel name": "_mm_fmadd_ps",
      "Intel Asm": "vfmadd132ps",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfma_f64",
      "full name": "float64x1_t vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmadd",
      "function_en": "[vector] fmadd [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfmaq_f64",
      "full name": "float64x2_t vfmaq_f64(float64x2_t a, float64x2_t b, float64x2_t c)",
      "Intel name": "_mm_fmadd_pd",
      "Intel Asm": "vfmadd132pd",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfma_lane_f32",
      "full name": "float32x2_t vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfmaq_lane_f32",
      "full name": "float32x4_t vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfma_lane_f64",
      "full name": "float64x1_t vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfmaq_lane_f64",
      "full name": "float64x2_t vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfmas_lane_f32",
      "full name": "float32_t vfmas_lane_f32(float32_t a, float32_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfmad_lane_f64",
      "full name": "float64_t vfmad_lane_f64(float64_t a, float64_t b, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfma_laneq_f32",
      "full name": "float32x2_t vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfmaq_laneq_f32",
      "full name": "float32x4_t vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfma_laneq_f64",
      "full name": "float64x1_t vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfmaq_laneq_f64",
      "full name": "float64x2_t vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfmas_laneq_f32",
      "full name": "float32_t vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vfmad_laneq_f64",
      "full name": "float64_t vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 乘加 [64]"
    },
    {
      "name": "vfms_f32",
      "full name": "float32x2_t vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfmsq_f32",
      "full name": "float32x4_t vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c)",
      "Intel name": "_mm_fmsub_ps",
      "Intel Asm": "vfmsub132ps",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfms_f64",
      "full name": "float64x1_t vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmsub",
      "function_en": "[vector] fmsub [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vfmsq_f64",
      "full name": "float64x2_t vfmsq_f64(float64x2_t a, float64x2_t b, float64x2_t c)",
      "Intel name": "_mm_fmsub_pd",
      "Intel Asm": "vfmsub132pd",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vfms_lane_f32",
      "full name": "float32x2_t vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfmsq_lane_f32",
      "full name": "float32x4_t vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfms_lane_f64",
      "full name": "float64x1_t vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vfmsq_lane_f64",
      "full name": "float64x2_t vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vfmss_lane_f32",
      "full name": "float32_t vfmss_lane_f32(float32_t a, float32_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfmsd_lane_f64",
      "full name": "float64_t vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vfms_laneq_f32",
      "full name": "float32x2_t vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfmsq_laneq_f32",
      "full name": "float32x4_t vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfms_laneq_f64",
      "full name": "float64x1_t vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vfmsq_laneq_f64",
      "full name": "float64x2_t vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vfmss_laneq_f32",
      "full name": "float32_t vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vfmsd_laneq_f64",
      "full name": "float64_t vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 乘减 [64]"
    },
    {
      "name": "vqdmulh_s16",
      "full name": "int16x4_t vqdmulh_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 饱和加倍乘 [16]"
    },
    {
      "name": "vqdmulhq_s16",
      "full name": "int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 饱和加倍乘 [16]"
    },
    {
      "name": "vqdmulh_s32",
      "full name": "int32x2_t vqdmulh_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 饱和加倍乘 [32]"
    },
    {
      "name": "vqdmulhq_s32",
      "full name": "int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 饱和加倍乘 [32]"
    },
    {
      "name": "vqdmulhh_s16",
      "full name": "int16_t vqdmulhh_s16(int16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[scalar] sqdmulh [16]",
      "function_cn": "[标量] 饱和加倍乘 [16]"
    },
    {
      "name": "vqdmulhs_s32",
      "full name": "int32_t vqdmulhs_s32(int32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[scalar] sqdmulh [32]",
      "function_cn": "[标量] 饱和加倍乘 [32]"
    },
    {
      "name": "vqrdmulh_s16",
      "full name": "int16x4_t vqrdmulh_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 饱和加倍乘 [16]"
    },
    {
      "name": "vqrdmulhq_s16",
      "full name": "int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 饱和加倍乘 [16]"
    },
    {
      "name": "vqrdmulh_s32",
      "full name": "int32x2_t vqrdmulh_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 饱和加倍乘 [32]"
    },
    {
      "name": "vqrdmulhq_s32",
      "full name": "int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 饱和加倍乘 [32]"
    },
    {
      "name": "vqrdmulhh_s16",
      "full name": "int16_t vqrdmulhh_s16(int16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[scalar] sqrdmulh [16]",
      "function_cn": "[标量] 饱和加倍乘 [16]"
    },
    {
      "name": "vqrdmulhs_s32",
      "full name": "int32_t vqrdmulhs_s32(int32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[scalar] sqrdmulh [32]",
      "function_cn": "[标量] 饱和加倍乘 [32]"
    },
    {
      "name": "vqdmlal_s16",
      "full name": "int32x4_t vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [16]",
      "function_cn": "[向量] 饱和加倍乘加 [16]"
    },
    {
      "name": "vqdmlal_s32",
      "full name": "int64x2_t vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [32]",
      "function_cn": "[向量] 饱和加倍乘加 [32]"
    },
    {
      "name": "vqdmlalh_s16",
      "full name": "int32_t vqdmlalh_s16(int32_t a, int16_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[scalar] sqdmlal [16]",
      "function_cn": "[标量] 饱和加倍乘加 [16]"
    },
    {
      "name": "vqdmlals_s32",
      "full name": "int64_t vqdmlals_s32(int64_t a, int32_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[scalar] sqdmlal [32]",
      "function_cn": "[标量] 饱和加倍乘加 [32]"
    },
    {
      "name": "vqdmlal_high_s16",
      "full name": "int32x4_t vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [16]",
      "function_cn": "[向量] 饱和加倍乘加 [16]"
    },
    {
      "name": "vqdmlal_high_s32",
      "full name": "int64x2_t vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [32]",
      "function_cn": "[向量] 饱和加倍乘加 [32]"
    },
    {
      "name": "vqdmlsl_s16",
      "full name": "int32x4_t vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [16]",
      "function_cn": "[向量] 饱和加倍乘减 [16]"
    },
    {
      "name": "vqdmlsl_s32",
      "full name": "int64x2_t vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [32]",
      "function_cn": "[向量] 饱和加倍乘减 [32]"
    },
    {
      "name": "vqdmlslh_s16",
      "full name": "int32_t vqdmlslh_s16(int32_t a, int16_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[scalar] sqdmlsl [16]",
      "function_cn": "[标量] 饱和加倍乘减 [16]"
    },
    {
      "name": "vqdmlsls_s32",
      "full name": "int64_t vqdmlsls_s32(int64_t a, int32_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[scalar] sqdmlsl [32]",
      "function_cn": "[标量] 饱和加倍乘减 [32]"
    },
    {
      "name": "vqdmlsl_high_s16",
      "full name": "int32x4_t vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [16]",
      "function_cn": "[向量] 饱和加倍乘减 [16]"
    },
    {
      "name": "vqdmlsl_high_s32",
      "full name": "int64x2_t vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [32]",
      "function_cn": "[向量] 饱和加倍乘减 [32]"
    },
    {
      "name": "vmull_s8",
      "full name": "int16x8_t vmull_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [8]",
      "function_cn": "[向量] 长型乘 [8]"
    },
    {
      "name": "vmull_s16",
      "full name": "int32x4_t vmull_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [16]",
      "function_cn": "[向量] 长型乘 [16]"
    },
    {
      "name": "vmull_s32",
      "full name": "int64x2_t vmull_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "_mm_mul_epi32",
      "Intel Asm": "pmuldp",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [32]",
      "function_cn": "[向量] 长型乘 [32]"
    },
    {
      "name": "vmull_u8",
      "full name": "uint16x8_t vmull_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [8]",
      "function_cn": "[向量] 长型乘 [8]"
    },
    {
      "name": "vmull_u16",
      "full name": "uint32x4_t vmull_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [16]",
      "function_cn": "[向量] 长型乘 [16]"
    },
    {
      "name": "vmull_u32",
      "full name": "uint64x2_t vmull_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "_mm_mul_epu32",
      "Intel Asm": "pmuludq",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [32]",
      "function_cn": "[向量] 长型乘 [32]"
    },
    {
      "name": "vmull_p8",
      "full name": "poly16x8_t vmull_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "pmull",
      "function_en": "[vector] pmull [8]",
      "function_cn": "[向量] 长型乘 [8]"
    },
    {
      "name": "vmull_high_s8",
      "full name": "int16x8_t vmull_high_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [8]",
      "function_cn": "[向量] 长型乘 [8]"
    },
    {
      "name": "vmull_high_s16",
      "full name": "int32x4_t vmull_high_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [16]",
      "function_cn": "[向量] 长型乘 [16]"
    },
    {
      "name": "vmull_high_s32",
      "full name": "int64x2_t vmull_high_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [32]",
      "function_cn": "[向量] 长型乘 [32]"
    },
    {
      "name": "vmull_high_u8",
      "full name": "uint16x8_t vmull_high_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [8]",
      "function_cn": "[向量] 长型乘 [8]"
    },
    {
      "name": "vmull_high_u16",
      "full name": "uint32x4_t vmull_high_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [16]",
      "function_cn": "[向量] 长型乘 [16]"
    },
    {
      "name": "vmull_high_u32",
      "full name": "uint64x2_t vmull_high_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [32]",
      "function_cn": "[向量] 长型乘 [32]"
    },
    {
      "name": "vmull_high_p8",
      "full name": "poly16x8_t vmull_high_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "pmull2",
      "function_en": "[vector] pmull2 [8]",
      "function_cn": "[向量] 长型乘 [8]"
    },
    {
      "name": "vqdmull_s16",
      "full name": "int32x4_t vqdmull_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [16]",
      "function_cn": "[向量] 饱和加倍长型乘 [16]"
    },
    {
      "name": "vqdmull_s32",
      "full name": "int64x2_t vqdmull_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [32]",
      "function_cn": "[向量] 饱和加倍长型乘 [32]"
    },
    {
      "name": "vqdmullh_s16",
      "full name": "int32_t vqdmullh_s16(int16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[scalar] sqdmull [16]",
      "function_cn": "[标量] 饱和加倍长型乘 [16]"
    },
    {
      "name": "vqdmulls_s32",
      "full name": "int64_t vqdmulls_s32(int32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[scalar] sqdmull [32]",
      "function_cn": "[标量] 饱和加倍长型乘 [32]"
    },
    {
      "name": "vqdmull_high_s16",
      "full name": "int32x4_t vqdmull_high_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [16]",
      "function_cn": "[向量] 饱和加倍长型乘 [16]"
    },
    {
      "name": "vqdmull_high_s32",
      "full name": "int64x2_t vqdmull_high_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [32]",
      "function_cn": "[向量] 饱和加倍长型乘 [32]"
    },
    {
      "name": "vsub_s8",
      "full name": "int8x8_t vsub_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "_mm_sub_pi8",
      "Intel Asm": "psubb",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubq_s8",
      "full name": "int8x16_t vsubq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_sub_epi8",
      "Intel Asm": "psubb",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsub_s16",
      "full name": "int16x4_t vsub_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_sub_pi16",
      "Intel Asm": "psubw",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubq_s16",
      "full name": "int16x8_t vsubq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_sub_epi16",
      "Intel Asm": "psubw",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsub_s32",
      "full name": "int32x2_t vsub_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "_mm_sub_pi32",
      "Intel Asm": "psubd",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubq_s32",
      "full name": "int32x4_t vsubq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_sub_epi32",
      "Intel Asm": "psubd",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsub_s64",
      "full name": "int64x1_t vsub_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [64]",
      "function_cn": "[向量] 减 [64]"
    },
    {
      "name": "vsubq_s64",
      "full name": "int64x2_t vsubq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_sub_epi64",
      "Intel Asm": "psubq",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [64]",
      "function_cn": "[向量] 减 [64]"
    },
    {
      "name": "vsub_u8",
      "full name": "uint8x8_t vsub_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubq_u8",
      "full name": "uint8x16_t vsubq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsub_u16",
      "full name": "uint16x4_t vsub_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubq_u16",
      "full name": "uint16x8_t vsubq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsub_u32",
      "full name": "uint32x2_t vsub_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubq_u32",
      "full name": "uint32x4_t vsubq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsub_u64",
      "full name": "uint64x1_t vsub_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [64]",
      "function_cn": "[向量] 减 [64]"
    },
    {
      "name": "vsubq_u64",
      "full name": "uint64x2_t vsubq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[vector] sub [64]",
      "function_cn": "[向量] 减 [64]"
    },
    {
      "name": "vsub_f32",
      "full name": "float32x2_t vsub_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fsub",
      "function_en": "[vector] fsub [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubq_f32",
      "full name": "float32x4_t vsubq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_sub_ps",
      "Intel Asm": "subps",
      "Arm Asm": "fsub",
      "function_en": "[vector] fsub [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsub_f64",
      "full name": "float64x1_t vsub_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fsub",
      "function_en": "[vector] fsub [64]",
      "function_cn": "[向量] 减 [64]"
    },
    {
      "name": "vsubq_f64",
      "full name": "float64x2_t vsubq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_sub_pd",
      "Intel Asm": "subpd",
      "Arm Asm": "fsub",
      "function_en": "[vector] fsub [64]",
      "function_cn": "[向量] 减 [64]"
    },
    {
      "name": "vsubd_s64",
      "full name": "int64_t vsubd_s64(int64_t a, int64_t b)",
      "Intel name": "_mm_sub_si64",
      "Intel Asm": "psubq",
      "Arm Asm": "sub",
      "function_en": "[scalar] sub [64]",
      "function_cn": "[标量] 减 [64]"
    },
    {
      "name": "vsubd_u64",
      "full name": "uint64_t vsubd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sub",
      "function_en": "[scalar] sub [64]",
      "function_cn": "[标量] 减 [64]"
    },
    {
      "name": "vsubl_s8",
      "full name": "int16x8_t vsubl_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubl",
      "function_en": "[vector] ssubl [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubl_s16",
      "full name": "int32x4_t vsubl_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubl",
      "function_en": "[vector] ssubl [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubl_s32",
      "full name": "int64x2_t vsubl_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubl",
      "function_en": "[vector] ssubl [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubl_u8",
      "full name": "uint16x8_t vsubl_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubl",
      "function_en": "[vector] usubl [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubl_u16",
      "full name": "uint32x4_t vsubl_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubl",
      "function_en": "[vector] usubl [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubl_u32",
      "full name": "uint64x2_t vsubl_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubl",
      "function_en": "[vector] usubl [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubl_high_s8",
      "full name": "int16x8_t vsubl_high_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubl2",
      "function_en": "[vector] ssubl2 [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubl_high_s16",
      "full name": "int32x4_t vsubl_high_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubl2",
      "function_en": "[vector] ssubl2 [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubl_high_s32",
      "full name": "int64x2_t vsubl_high_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubl2",
      "function_en": "[vector] ssubl2 [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubl_high_u8",
      "full name": "uint16x8_t vsubl_high_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubl2",
      "function_en": "[vector] usubl2 [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubl_high_u16",
      "full name": "uint32x4_t vsubl_high_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubl2",
      "function_en": "[vector] usubl2 [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubl_high_u32",
      "full name": "uint64x2_t vsubl_high_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubl2",
      "function_en": "[vector] usubl2 [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubw_s8",
      "full name": "int16x8_t vsubw_s8(int16x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubw",
      "function_en": "[vector] ssubw [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubw_s16",
      "full name": "int32x4_t vsubw_s16(int32x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubw",
      "function_en": "[vector] ssubw [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubw_s32",
      "full name": "int64x2_t vsubw_s32(int64x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubw",
      "function_en": "[vector] ssubw [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubw_u8",
      "full name": "uint16x8_t vsubw_u8(uint16x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubw",
      "function_en": "[vector] usubw [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubw_u16",
      "full name": "uint32x4_t vsubw_u16(uint32x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubw",
      "function_en": "[vector] usubw [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubw_u32",
      "full name": "uint64x2_t vsubw_u32(uint64x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubw",
      "function_en": "[vector] usubw [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubw_high_s8",
      "full name": "int16x8_t vsubw_high_s8(int16x8_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubw2",
      "function_en": "[vector] ssubw2 [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubw_high_s16",
      "full name": "int32x4_t vsubw_high_s16(int32x4_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubw2",
      "function_en": "[vector] ssubw2 [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubw_high_s32",
      "full name": "int64x2_t vsubw_high_s32(int64x2_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssubw2",
      "function_en": "[vector] ssubw2 [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vsubw_high_u8",
      "full name": "uint16x8_t vsubw_high_u8(uint16x8_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubw2",
      "function_en": "[vector] usubw2 [8]",
      "function_cn": "[向量] 减 [8]"
    },
    {
      "name": "vsubw_high_u16",
      "full name": "uint32x4_t vsubw_high_u16(uint32x4_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubw2",
      "function_en": "[vector] usubw2 [16]",
      "function_cn": "[向量] 减 [16]"
    },
    {
      "name": "vsubw_high_u32",
      "full name": "uint64x2_t vsubw_high_u32(uint64x2_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usubw2",
      "function_en": "[vector] usubw2 [32]",
      "function_cn": "[向量] 减 [32]"
    },
    {
      "name": "vhsub_s8",
      "full name": "int8x8_t vhsub_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shsub",
      "function_en": "[vector] shsub [8]",
      "function_cn": "[向量] 相减并右移一位 [8]"
    },
    {
      "name": "vhsubq_s8",
      "full name": "int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shsub",
      "function_en": "[vector] shsub [8]",
      "function_cn": "[向量] 相减并右移一位 [8]"
    },
    {
      "name": "vhsub_s16",
      "full name": "int16x4_t vhsub_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shsub",
      "function_en": "[vector] shsub [16]",
      "function_cn": "[向量] 相减并右移一位 [16]"
    },
    {
      "name": "vhsubq_s16",
      "full name": "int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shsub",
      "function_en": "[vector] shsub [16]",
      "function_cn": "[向量] 相减并右移一位 [16]"
    },
    {
      "name": "vhsub_s32",
      "full name": "int32x2_t vhsub_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shsub",
      "function_en": "[vector] shsub [32]",
      "function_cn": "[向量] 相减并右移一位 [32]"
    },
    {
      "name": "vhsubq_s32",
      "full name": "int32x4_t vhsubq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shsub",
      "function_en": "[vector] shsub [32]",
      "function_cn": "[向量] 相减并右移一位 [32]"
    },
    {
      "name": "vhsub_u8",
      "full name": "uint8x8_t vhsub_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhsub",
      "function_en": "[vector] uhsub [8]",
      "function_cn": "[向量] 相减并右移一位 [8]"
    },
    {
      "name": "vhsubq_u8",
      "full name": "uint8x16_t vhsubq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhsub",
      "function_en": "[vector] uhsub [8]",
      "function_cn": "[向量] 相减并右移一位 [8]"
    },
    {
      "name": "vhsub_u16",
      "full name": "uint16x4_t vhsub_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhsub",
      "function_en": "[vector] uhsub [16]",
      "function_cn": "[向量] 相减并右移一位 [16]"
    },
    {
      "name": "vhsubq_u16",
      "full name": "uint16x8_t vhsubq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhsub",
      "function_en": "[vector] uhsub [16]",
      "function_cn": "[向量] 相减并右移一位 [16]"
    },
    {
      "name": "vhsub_u32",
      "full name": "uint32x2_t vhsub_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhsub",
      "function_en": "[vector] uhsub [32]",
      "function_cn": "[向量] 相减并右移一位 [32]"
    },
    {
      "name": "vhsubq_u32",
      "full name": "uint32x4_t vhsubq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uhsub",
      "function_en": "[vector] uhsub [32]",
      "function_cn": "[向量] 相减并右移一位 [32]"
    },
    {
      "name": "vqsub_s8",
      "full name": "int8x8_t vqsub_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "_mm_subs_pi8",
      "Intel Asm": "psubsb",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [8]",
      "function_cn": "[向量] 饱和减 [8]"
    },
    {
      "name": "vqsubq_s8",
      "full name": "int8x16_t vqsubq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_subs_epi8",
      "Intel Asm": "psubsb",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [8]",
      "function_cn": "[向量] 饱和减 [8]"
    },
    {
      "name": "vqsub_s16",
      "full name": "int16x4_t vqsub_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_subs_pi16",
      "Intel Asm": "psubsw",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [16]",
      "function_cn": "[向量] 饱和减 [16]"
    },
    {
      "name": "vqsubq_s16",
      "full name": "int16x8_t vqsubq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_subs_epi16",
      "Intel Asm": "psubsw",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [16]",
      "function_cn": "[向量] 饱和减 [16]"
    },
    {
      "name": "vqsub_s32",
      "full name": "int32x2_t vqsub_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [32]",
      "function_cn": "[向量] 饱和减 [32]"
    },
    {
      "name": "vqsubq_s32",
      "full name": "int32x4_t vqsubq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [32]",
      "function_cn": "[向量] 饱和减 [32]"
    },
    {
      "name": "vqsub_s64",
      "full name": "int64x1_t vqsub_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [64]",
      "function_cn": "[向量] 饱和减 [64]"
    },
    {
      "name": "vqsubq_s64",
      "full name": "int64x2_t vqsubq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[vector] sqsub [64]",
      "function_cn": "[向量] 饱和减 [64]"
    },
    {
      "name": "vqsub_u8",
      "full name": "uint8x8_t vqsub_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "_mm_subs_pu8",
      "Intel Asm": "psubusb",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [8]",
      "function_cn": "[向量] 饱和减 [8]"
    },
    {
      "name": "vqsubq_u8",
      "full name": "uint8x16_t vqsubq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "_mm_subs_epu8",
      "Intel Asm": "psubusb",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [8]",
      "function_cn": "[向量] 饱和减 [8]"
    },
    {
      "name": "vqsub_u16",
      "full name": "uint16x4_t vqsub_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "_mm_subs_pu16",
      "Intel Asm": "psubusw",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [16]",
      "function_cn": "[向量] 饱和减 [16]"
    },
    {
      "name": "vqsubq_u16",
      "full name": "uint16x8_t vqsubq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "_mm_subs_epu16",
      "Intel Asm": "psubusw",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [16]",
      "function_cn": "[向量] 饱和减 [16]"
    },
    {
      "name": "vqsub_u32",
      "full name": "uint32x2_t vqsub_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [32]",
      "function_cn": "[向量] 饱和减 [32]"
    },
    {
      "name": "vqsubq_u32",
      "full name": "uint32x4_t vqsubq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [32]",
      "function_cn": "[向量] 饱和减 [32]"
    },
    {
      "name": "vqsub_u64",
      "full name": "uint64x1_t vqsub_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [64]",
      "function_cn": "[向量] 饱和减 [64]"
    },
    {
      "name": "vqsubq_u64",
      "full name": "uint64x2_t vqsubq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[vector] uqsub [64]",
      "function_cn": "[向量] 饱和减 [64]"
    },
    {
      "name": "vqsubb_s8",
      "full name": "int8_t vqsubb_s8(int8_t a, int8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[scalar] sqsub [8]",
      "function_cn": "[标量] 饱和减 [8]"
    },
    {
      "name": "vqsubh_s16",
      "full name": "int16_t vqsubh_s16(int16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[scalar] sqsub [16]",
      "function_cn": "[标量] 饱和减 [16]"
    },
    {
      "name": "vqsubs_s32",
      "full name": "int32_t vqsubs_s32(int32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[scalar] sqsub [32]",
      "function_cn": "[标量] 饱和减 [32]"
    },
    {
      "name": "vqsubd_s64",
      "full name": "int64_t vqsubd_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqsub",
      "function_en": "[scalar] sqsub [64]",
      "function_cn": "[标量] 饱和减 [64]"
    },
    {
      "name": "vqsubb_u8",
      "full name": "uint8_t vqsubb_u8(uint8_t a, uint8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[scalar] uqsub [8]",
      "function_cn": "[标量] 饱和减 [8]"
    },
    {
      "name": "vqsubh_u16",
      "full name": "uint16_t vqsubh_u16(uint16_t a, uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[scalar] uqsub [16]",
      "function_cn": "[标量] 饱和减 [16]"
    },
    {
      "name": "vqsubs_u32",
      "full name": "uint32_t vqsubs_u32(uint32_t a, uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[scalar] uqsub [32]",
      "function_cn": "[标量] 饱和减 [32]"
    },
    {
      "name": "vqsubd_u64",
      "full name": "uint64_t vqsubd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqsub",
      "function_en": "[scalar] uqsub [64]",
      "function_cn": "[标量] 饱和减 [64]"
    },
    {
      "name": "vsubhn_s16",
      "full name": "int8x8_t vsubhn_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn",
      "function_en": "[vector] subhn [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vsubhn_s32",
      "full name": "int16x4_t vsubhn_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn",
      "function_en": "[vector] subhn [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vsubhn_s64",
      "full name": "int32x2_t vsubhn_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn",
      "function_en": "[vector] subhn [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vsubhn_u16",
      "full name": "uint8x8_t vsubhn_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn",
      "function_en": "[vector] subhn [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vsubhn_u32",
      "full name": "uint16x4_t vsubhn_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn",
      "function_en": "[vector] subhn [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vsubhn_u64",
      "full name": "uint32x2_t vsubhn_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn",
      "function_en": "[vector] subhn [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vsubhn_high_s16",
      "full name": "int8x16_t vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn2",
      "function_en": "[vector] subhn2 [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vsubhn_high_s32",
      "full name": "int16x8_t vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn2",
      "function_en": "[vector] subhn2 [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vsubhn_high_s64",
      "full name": "int32x4_t vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn2",
      "function_en": "[vector] subhn2 [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vsubhn_high_u16",
      "full name": "uint8x16_t vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn2",
      "function_en": "[vector] subhn2 [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vsubhn_high_u32",
      "full name": "uint16x8_t vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn2",
      "function_en": "[vector] subhn2 [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vsubhn_high_u64",
      "full name": "uint32x4_t vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "subhn2",
      "function_en": "[vector] subhn2 [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vrsubhn_s16",
      "full name": "int8x8_t vrsubhn_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn",
      "function_en": "[vector] rsubhn [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vrsubhn_s32",
      "full name": "int16x4_t vrsubhn_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn",
      "function_en": "[vector] rsubhn [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vrsubhn_s64",
      "full name": "int32x2_t vrsubhn_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn",
      "function_en": "[vector] rsubhn [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vrsubhn_u16",
      "full name": "uint8x8_t vrsubhn_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn",
      "function_en": "[vector] rsubhn [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vrsubhn_u32",
      "full name": "uint16x4_t vrsubhn_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn",
      "function_en": "[vector] rsubhn [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vrsubhn_u64",
      "full name": "uint32x2_t vrsubhn_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn",
      "function_en": "[vector] rsubhn [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vrsubhn_high_s16",
      "full name": "int8x16_t vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn2",
      "function_en": "[vector] rsubhn2 [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vrsubhn_high_s32",
      "full name": "int16x8_t vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn2",
      "function_en": "[vector] rsubhn2 [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vrsubhn_high_s64",
      "full name": "int32x4_t vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn2",
      "function_en": "[vector] rsubhn2 [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vrsubhn_high_u16",
      "full name": "uint8x16_t vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn2",
      "function_en": "[vector] rsubhn2 [16]",
      "function_cn": "[向量] 窄型减 [16]"
    },
    {
      "name": "vrsubhn_high_u32",
      "full name": "uint16x8_t vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn2",
      "function_en": "[vector] rsubhn2 [32]",
      "function_cn": "[向量] 窄型减 [32]"
    },
    {
      "name": "vrsubhn_high_u64",
      "full name": "uint32x4_t vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rsubhn2",
      "function_en": "[vector] rsubhn2 [64]",
      "function_cn": "[向量] 窄型减 [64]"
    },
    {
      "name": "vceq_s8",
      "full name": "uint8x8_t vceq_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "_mm_cmpeq_pi8",
      "Intel Asm": "pcmpeqb",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较等于 [8]"
    },
    {
      "name": "vceqq_s8",
      "full name": "uint8x16_t vceqq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_cmpeq_epi8",
      "Intel Asm": "pcmpeqb",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较等于 [8]"
    },
    {
      "name": "vceq_s16",
      "full name": "uint16x4_t vceq_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_cmpeq_pi16",
      "Intel Asm": "pcmpeqw",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较等于 [16]"
    },
    {
      "name": "vceqq_s16",
      "full name": "uint16x8_t vceqq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_cmpeq_epi16",
      "Intel Asm": "pcmpeqw",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较等于 [16]"
    },
    {
      "name": "vceq_s32",
      "full name": "uint32x2_t vceq_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "_mm_cmpeq_pi32",
      "Intel Asm": "pcmpeqd",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较等于 [32]"
    },
    {
      "name": "vceqq_s32",
      "full name": "uint32x4_t vceqq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_cmpeq_epi32",
      "Intel Asm": "pcmpeqd",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较等于 [32]"
    },
    {
      "name": "vceq_u8",
      "full name": "uint8x8_t vceq_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较等于 [8]"
    },
    {
      "name": "vceqq_u8",
      "full name": "uint8x16_t vceqq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较等于 [8]"
    },
    {
      "name": "vceq_u16",
      "full name": "uint16x4_t vceq_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较等于 [16]"
    },
    {
      "name": "vceqq_u16",
      "full name": "uint16x8_t vceqq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较等于 [16]"
    },
    {
      "name": "vceq_u32",
      "full name": "uint32x2_t vceq_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较等于 [32]"
    },
    {
      "name": "vceqq_u32",
      "full name": "uint32x4_t vceqq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较等于 [32]"
    },
    {
      "name": "vceq_f32",
      "full name": "uint32x2_t vceq_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [32]",
      "function_cn": "[向量] 比较等于 [32]"
    },
    {
      "name": "vceqq_f32",
      "full name": "uint32x4_t vceqq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_cmpeq_ps",
      "Intel Asm": "cmpps",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [32]",
      "function_cn": "[向量] 比较等于 [32]"
    },
    {
      "name": "vceq_p8",
      "full name": "uint8x8_t vceq_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较等于 [8]"
    },
    {
      "name": "vceqq_p8",
      "full name": "uint8x16_t vceqq_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较等于 [8]"
    },
    {
      "name": "vceq_s64",
      "full name": "uint64x1_t vceq_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceqq_s64",
      "full name": "uint64x2_t vceqq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_cmpeq_epi64",
      "Intel Asm": "pcmpeqq",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceq_u64",
      "full name": "uint64x1_t vceq_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceqq_u64",
      "full name": "uint64x2_t vceqq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceq_p64",
      "full name": "uint64x1_t vceq_p64(poly64x1_t a, poly64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceqq_p64",
      "full name": "uint64x2_t vceqq_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceq_f64",
      "full name": "uint64x1_t vceq_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceqq_f64",
      "full name": "uint64x2_t vceqq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_cmpeq_pd",
      "Intel Asm": "cmppd",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [64]",
      "function_cn": "[向量] 比较等于 [64]"
    },
    {
      "name": "vceqd_s64",
      "full name": "uint64_t vceqd_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[scalar] cmeq [64]",
      "function_cn": "[标量] 比较等于 [64]"
    },
    {
      "name": "vceqd_u64",
      "full name": "uint64_t vceqd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[scalar] cmeq [64]",
      "function_cn": "[标量] 比较等于 [64]"
    },
    {
      "name": "vceqs_f32",
      "full name": "uint32_t vceqs_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[scalar] fcmeq [32]",
      "function_cn": "[标量] 比较等于 [32]"
    },
    {
      "name": "vceqd_f64",
      "full name": "uint64_t vceqd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[scalar] fcmeq [64]",
      "function_cn": "[标量] 比较等于 [64]"
    },
    {
      "name": "vceqz_s8",
      "full name": "uint8x8_t vceqz_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较是否等于零寄存器 [8]"
    },
    {
      "name": "vceqzq_s8",
      "full name": "uint8x16_t vceqzq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较是否等于零寄存器 [8]"
    },
    {
      "name": "vceqz_s16",
      "full name": "uint16x4_t vceqz_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较是否等于零寄存器 [16]"
    },
    {
      "name": "vceqzq_s16",
      "full name": "uint16x8_t vceqzq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较是否等于零寄存器 [16]"
    },
    {
      "name": "vceqz_s32",
      "full name": "uint32x2_t vceqz_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较是否等于零寄存器 [32]"
    },
    {
      "name": "vceqzq_s32",
      "full name": "uint32x4_t vceqzq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较是否等于零寄存器 [32]"
    },
    {
      "name": "vceqz_u8",
      "full name": "uint8x8_t vceqz_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较是否等于零寄存器 [8]"
    },
    {
      "name": "vceqzq_u8",
      "full name": "uint8x16_t vceqzq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较是否等于零寄存器 [8]"
    },
    {
      "name": "vceqz_u16",
      "full name": "uint16x4_t vceqz_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较是否等于零寄存器 [16]"
    },
    {
      "name": "vceqzq_u16",
      "full name": "uint16x8_t vceqzq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [16]",
      "function_cn": "[向量] 比较是否等于零寄存器 [16]"
    },
    {
      "name": "vceqz_u32",
      "full name": "uint32x2_t vceqz_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较是否等于零寄存器 [32]"
    },
    {
      "name": "vceqzq_u32",
      "full name": "uint32x4_t vceqzq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [32]",
      "function_cn": "[向量] 比较是否等于零寄存器 [32]"
    },
    {
      "name": "vceqz_f32",
      "full name": "uint32x2_t vceqz_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [32]",
      "function_cn": "[向量] 比较是否等于零寄存器 [32]"
    },
    {
      "name": "vceqzq_f32",
      "full name": "uint32x4_t vceqzq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [32]",
      "function_cn": "[向量] 比较是否等于零寄存器 [32]"
    },
    {
      "name": "vceqz_p8",
      "full name": "uint8x8_t vceqz_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较是否等于零寄存器 [8]"
    },
    {
      "name": "vceqzq_p8",
      "full name": "uint8x16_t vceqzq_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [8]",
      "function_cn": "[向量] 比较是否等于零寄存器 [8]"
    },
    {
      "name": "vceqz_s64",
      "full name": "uint64x1_t vceqz_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqzq_s64",
      "full name": "uint64x2_t vceqzq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqz_u64",
      "full name": "uint64x1_t vceqz_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqzq_u64",
      "full name": "uint64x2_t vceqzq_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqz_p64",
      "full name": "uint64x1_t vceqz_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqzq_p64",
      "full name": "uint64x2_t vceqzq_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[vector] cmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqz_f64",
      "full name": "uint64x1_t vceqz_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqzq_f64",
      "full name": "uint64x2_t vceqzq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[vector] fcmeq [64]",
      "function_cn": "[向量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqzd_s64",
      "full name": "uint64_t vceqzd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[scalar] cmeq [64]",
      "function_cn": "[标量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqzd_u64",
      "full name": "uint64_t vceqzd_u64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmeq",
      "function_en": "[scalar] cmeq [64]",
      "function_cn": "[标量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vceqzs_f32",
      "full name": "uint32_t vceqzs_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[scalar] fcmeq [32]",
      "function_cn": "[标量] 比较是否等于零寄存器 [32]"
    },
    {
      "name": "vceqzd_f64",
      "full name": "uint64_t vceqzd_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmeq",
      "function_en": "[scalar] fcmeq [64]",
      "function_cn": "[标量] 比较是否等于零寄存器 [64]"
    },
    {
      "name": "vcge_s8",
      "full name": "uint8x8_t vcge_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [8]",
      "function_cn": "[向量] 比较大于等于 [8]"
    },
    {
      "name": "vcgeq_s8",
      "full name": "uint8x16_t vcgeq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [8]",
      "function_cn": "[向量] 比较大于等于 [8]"
    },
    {
      "name": "vcge_s16",
      "full name": "uint16x4_t vcge_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [16]",
      "function_cn": "[向量] 比较大于等于 [16]"
    },
    {
      "name": "vcgeq_s16",
      "full name": "uint16x8_t vcgeq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [16]",
      "function_cn": "[向量] 比较大于等于 [16]"
    },
    {
      "name": "vcge_s32",
      "full name": "uint32x2_t vcge_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [32]",
      "function_cn": "[向量] 比较大于等于 [32]"
    },
    {
      "name": "vcgeq_s32",
      "full name": "uint32x4_t vcgeq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [32]",
      "function_cn": "[向量] 比较大于等于 [32]"
    },
    {
      "name": "vcge_u8",
      "full name": "uint8x8_t vcge_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [8]",
      "function_cn": "[向量] 比较大于等于 [8]"
    },
    {
      "name": "vcgeq_u8",
      "full name": "uint8x16_t vcgeq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [8]",
      "function_cn": "[向量] 比较大于等于 [8]"
    },
    {
      "name": "vcge_u16",
      "full name": "uint16x4_t vcge_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [16]",
      "function_cn": "[向量] 比较大于等于 [16]"
    },
    {
      "name": "vcgeq_u16",
      "full name": "uint16x8_t vcgeq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [16]",
      "function_cn": "[向量] 比较大于等于 [16]"
    },
    {
      "name": "vcge_u32",
      "full name": "uint32x2_t vcge_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [32]",
      "function_cn": "[向量] 比较大于等于 [32]"
    },
    {
      "name": "vcgeq_u32",
      "full name": "uint32x4_t vcgeq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [32]",
      "function_cn": "[向量] 比较大于等于 [32]"
    },
    {
      "name": "vcge_f32",
      "full name": "uint32x2_t vcge_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [32]",
      "function_cn": "[向量] 比较大于等于 [32]"
    },
    {
      "name": "vcgeq_f32",
      "full name": "uint32x4_t vcgeq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_cmpge_ps",
      "Intel Asm": "cmpps",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [32]",
      "function_cn": "[向量] 比较大于等于 [32]"
    },
    {
      "name": "vcge_s64",
      "full name": "uint64x1_t vcge_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [64]",
      "function_cn": "[向量] 比较大于等于 [64]"
    },
    {
      "name": "vcgeq_s64",
      "full name": "uint64x2_t vcgeq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [64]",
      "function_cn": "[向量] 比较大于等于 [64]"
    },
    {
      "name": "vcge_u64",
      "full name": "uint64x1_t vcge_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [64]",
      "function_cn": "[向量] 比较大于等于 [64]"
    },
    {
      "name": "vcgeq_u64",
      "full name": "uint64x2_t vcgeq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [64]",
      "function_cn": "[向量] 比较大于等于 [64]"
    },
    {
      "name": "vcge_f64",
      "full name": "uint64x1_t vcge_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [64]",
      "function_cn": "[向量] 比较大于等于 [64]"
    },
    {
      "name": "vcgeq_f64",
      "full name": "uint64x2_t vcgeq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_cmpge_pd",
      "Intel Asm": "cmppd",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [64]",
      "function_cn": "[向量] 比较大于等于 [64]"
    },
    {
      "name": "vcged_s64",
      "full name": "uint64_t vcged_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[scalar] cmge [64]",
      "function_cn": "[标量] 比较大于等于 [64]"
    },
    {
      "name": "vcged_u64",
      "full name": "uint64_t vcged_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[scalar] cmhs [64]",
      "function_cn": "[标量] 比较大于等于 [64]"
    },
    {
      "name": "vcges_f32",
      "full name": "uint32_t vcges_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[scalar] fcmge [32]",
      "function_cn": "[标量] 比较大于等于 [32]"
    },
    {
      "name": "vcged_f64",
      "full name": "uint64_t vcged_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[scalar] fcmge [32]",
      "function_cn": "[标量] 比较大于等于 [32]"
    },
    {
      "name": "vcgez_s8",
      "full name": "uint8x8_t vcgez_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [8]",
      "function_cn": "[向量] 比较大于等于零寄存器 [8]"
    },
    {
      "name": "vcgezq_s8",
      "full name": "uint8x16_t vcgezq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [8]",
      "function_cn": "[向量] 比较大于等于零寄存器 [8]"
    },
    {
      "name": "vcgez_s16",
      "full name": "uint16x4_t vcgez_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [16]",
      "function_cn": "[向量] 比较大于等于零寄存器 [16]"
    },
    {
      "name": "vcgezq_s16",
      "full name": "uint16x8_t vcgezq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [16]",
      "function_cn": "[向量] 比较大于等于零寄存器 [16]"
    },
    {
      "name": "vcgez_s32",
      "full name": "uint32x2_t vcgez_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [32]",
      "function_cn": "[向量] 比较大于等于零寄存器 [32]"
    },
    {
      "name": "vcgezq_s32",
      "full name": "uint32x4_t vcgezq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [32]",
      "function_cn": "[向量] 比较大于等于零寄存器 [32]"
    },
    {
      "name": "vcgez_s64",
      "full name": "uint64x1_t vcgez_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [64]",
      "function_cn": "[向量] 比较大于等于零寄存器 [64]"
    },
    {
      "name": "vcgezq_s64",
      "full name": "uint64x2_t vcgezq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [64]",
      "function_cn": "[向量] 比较大于等于零寄存器 [64]"
    },
    {
      "name": "vcgez_f32",
      "full name": "uint32x2_t vcgez_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [32]",
      "function_cn": "[向量] 比较大于等于零寄存器 [32]"
    },
    {
      "name": "vcgezq_f32",
      "full name": "uint32x4_t vcgezq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [32]",
      "function_cn": "[向量] 比较大于等于零寄存器 [32]"
    },
    {
      "name": "vcgez_f64",
      "full name": "uint64x1_t vcgez_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [64]",
      "function_cn": "[向量] 比较大于等于零寄存器 [64]"
    },
    {
      "name": "vcgezq_f64",
      "full name": "uint64x2_t vcgezq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [64]",
      "function_cn": "[向量] 比较大于等于零寄存器 [64]"
    },
    {
      "name": "vcgezd_s64",
      "full name": "uint64_t vcgezd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[scalar] cmge [64]",
      "function_cn": "[标量] 比较大于等于零寄存器 [64]"
    },
    {
      "name": "vcgezs_f32",
      "full name": "uint32_t vcgezs_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[scalar] fcmge [32]",
      "function_cn": "[标量] 比较大于等于零寄存器 [32]"
    },
    {
      "name": "vcgezd_f64",
      "full name": "uint64_t vcgezd_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[scalar] fcmge [64]",
      "function_cn": "[标量] 比较大于等于零寄存器 [64]"
    },
    {
      "name": "vcle_s8",
      "full name": "uint8x8_t vcle_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [8]",
      "function_cn": "[向量] 比较小于等于 [8]"
    },
    {
      "name": "vcleq_s8",
      "full name": "uint8x16_t vcleq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [8]",
      "function_cn": "[向量] 比较小于等于 [8]"
    },
    {
      "name": "vcle_s16",
      "full name": "uint16x4_t vcle_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [16]",
      "function_cn": "[向量] 比较小于等于 [16]"
    },
    {
      "name": "vcleq_s16",
      "full name": "uint16x8_t vcleq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [16]",
      "function_cn": "[向量] 比较小于等于 [16]"
    },
    {
      "name": "vcle_s32",
      "full name": "uint32x2_t vcle_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [32]",
      "function_cn": "[向量] 比较小于等于 [32]"
    },
    {
      "name": "vcleq_s32",
      "full name": "uint32x4_t vcleq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [32]",
      "function_cn": "[向量] 比较小于等于 [32]"
    },
    {
      "name": "vcle_u8",
      "full name": "uint8x8_t vcle_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [8]",
      "function_cn": "[向量] 比较小于等于 [8]"
    },
    {
      "name": "vcleq_u8",
      "full name": "uint8x16_t vcleq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [8]",
      "function_cn": "[向量] 比较小于等于 [8]"
    },
    {
      "name": "vcle_u16",
      "full name": "uint16x4_t vcle_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [16]",
      "function_cn": "[向量] 比较小于等于 [16]"
    },
    {
      "name": "vcleq_u16",
      "full name": "uint16x8_t vcleq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [16]",
      "function_cn": "[向量] 比较小于等于 [16]"
    },
    {
      "name": "vcle_u32",
      "full name": "uint32x2_t vcle_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [32]",
      "function_cn": "[向量] 比较小于等于 [32]"
    },
    {
      "name": "vcleq_u32",
      "full name": "uint32x4_t vcleq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [32]",
      "function_cn": "[向量] 比较小于等于 [32]"
    },
    {
      "name": "vcle_f32",
      "full name": "uint32x2_t vcle_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [32]",
      "function_cn": "[向量] 比较小于等于 [32]"
    },
    {
      "name": "vcleq_f32",
      "full name": "uint32x4_t vcleq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_cmple_ps",
      "Intel Asm": "cmpps",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [32]",
      "function_cn": "[向量] 比较小于等于 [32]"
    },
    {
      "name": "vcle_s64",
      "full name": "uint64x1_t vcle_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [64]",
      "function_cn": "[向量] 比较小于等于 [64]"
    },
    {
      "name": "vcleq_s64",
      "full name": "uint64x2_t vcleq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[vector] cmge [64]",
      "function_cn": "[向量] 比较小于等于 [64]"
    },
    {
      "name": "vcle_u64",
      "full name": "uint64x1_t vcle_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [64]",
      "function_cn": "[向量] 比较小于等于 [64]"
    },
    {
      "name": "vcleq_u64",
      "full name": "uint64x2_t vcleq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[vector] cmhs [64]",
      "function_cn": "[向量] 比较小于等于 [64]"
    },
    {
      "name": "vcle_f64",
      "full name": "uint64x1_t vcle_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [64]",
      "function_cn": "[向量] 比较小于等于 [64]"
    },
    {
      "name": "vcleq_f64",
      "full name": "uint64x2_t vcleq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_cmple_pd",
      "Intel Asm": "cmppd",
      "Arm Asm": "fcmge",
      "function_en": "[vector] fcmge [64]",
      "function_cn": "[向量] 比较小于等于 [64]"
    },
    {
      "name": "vcled_s64",
      "full name": "uint64_t vcled_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmge",
      "function_en": "[scalar] cmge [64]",
      "function_cn": "[标量] 比较小于等于 [64]"
    },
    {
      "name": "vcled_u64",
      "full name": "uint64_t vcled_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhs",
      "function_en": "[scalar] cmhs [64]",
      "function_cn": "[标量] 比较小于等于 [64]"
    },
    {
      "name": "vcles_f32",
      "full name": "uint32_t vcles_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[scalar] fcmge [32]",
      "function_cn": "[标量] 比较小于等于 [32]"
    },
    {
      "name": "vcled_f64",
      "full name": "uint64_t vcled_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmge",
      "function_en": "[scalar] fcmge [32]",
      "function_cn": "[标量] 比较小于等于 [32]"
    },
    {
      "name": "vclez_s8",
      "full name": "uint8x8_t vclez_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [8]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [8]"
    },
    {
      "name": "vclezq_s8",
      "full name": "uint8x16_t vclezq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [8]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [8]"
    },
    {
      "name": "vclez_s16",
      "full name": "uint16x4_t vclez_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [16]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [16]"
    },
    {
      "name": "vclezq_s16",
      "full name": "uint16x8_t vclezq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [16]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [16]"
    },
    {
      "name": "vclez_s32",
      "full name": "uint32x2_t vclez_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [32]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [32]"
    },
    {
      "name": "vclezq_s32",
      "full name": "uint32x4_t vclezq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [32]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [32]"
    },
    {
      "name": "vclez_s64",
      "full name": "uint64x1_t vclez_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [64]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [64]"
    },
    {
      "name": "vclezq_s64",
      "full name": "uint64x2_t vclezq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [64]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [64]"
    },
    {
      "name": "vclez_f32",
      "full name": "uint32x2_t vclez_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[vector] cmle [32]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [32]"
    },
    {
      "name": "vclezq_f32",
      "full name": "uint32x4_t vclezq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmle",
      "function_en": "[vector] fcmle [32]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [32]"
    },
    {
      "name": "vclez_f64",
      "full name": "uint64x1_t vclez_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmle",
      "function_en": "[vector] fcmle [64]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [64]"
    },
    {
      "name": "vclezq_f64",
      "full name": "uint64x2_t vclezq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmle",
      "function_en": "[vector] fcmle [64]",
      "function_cn": "[向量] 比较是否小于等于零寄存器 [64]"
    },
    {
      "name": "vclezd_s64",
      "full name": "uint64_t vclezd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmle",
      "function_en": "[scalar] cmle [64]",
      "function_cn": "[标量] 比较是否小于等于零寄存器 [64]"
    },
    {
      "name": "vclezs_f32",
      "full name": "uint32_t vclezs_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmle",
      "function_en": "[scalar] fcmle [32]",
      "function_cn": "[标量] 比较是否小于等于零寄存器 [32]"
    },
    {
      "name": "vclezd_f64",
      "full name": "uint64_t vclezd_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmle",
      "function_en": "[scalar] fcmle [64]",
      "function_cn": "[标量] 比较是否小于等于零寄存器 [64]"
    },
    {
      "name": "vcgt_s8",
      "full name": "uint8x8_t vcgt_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "_mm_cmpgt_pi8",
      "Intel Asm": "pcmpgtb",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [8]",
      "function_cn": "[向量] 比较大于 [8]"
    },
    {
      "name": "vcgtq_s8",
      "full name": "uint8x16_t vcgtq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_cmpgt_epi8",
      "Intel Asm": "pcmpgtb",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [8]",
      "function_cn": "[向量] 比较大于 [8]"
    },
    {
      "name": "vcgt_s16",
      "full name": "uint16x4_t vcgt_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_cmpgt_pi16",
      "Intel Asm": "pcmpgtw",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [16]",
      "function_cn": "[向量] 比较大于 [16]"
    },
    {
      "name": "vcgtq_s16",
      "full name": "uint16x8_t vcgtq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_cmpgt_epi16",
      "Intel Asm": "pcmpgtw",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [16]",
      "function_cn": "[向量] 比较大于 [16]"
    },
    {
      "name": "vcgt_s32",
      "full name": "uint32x2_t vcgt_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "_mm_cmpgt_pi32",
      "Intel Asm": "pcmpgtd",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [32]",
      "function_cn": "[向量] 比较大于 [32]"
    },
    {
      "name": "vcgtq_s32",
      "full name": "uint32x4_t vcgtq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_cmpgt_epi32",
      "Intel Asm": "pcmpgtd",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [32]",
      "function_cn": "[向量] 比较大于 [32]"
    },
    {
      "name": "vcgt_u8",
      "full name": "uint8x8_t vcgt_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [8]",
      "function_cn": "[向量] 比较大于 [8]"
    },
    {
      "name": "vcgtq_u8",
      "full name": "uint8x16_t vcgtq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [8]",
      "function_cn": "[向量] 比较大于 [8]"
    },
    {
      "name": "vcgt_u16",
      "full name": "uint16x4_t vcgt_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [16]",
      "function_cn": "[向量] 比较大于 [16]"
    },
    {
      "name": "vcgtq_u16",
      "full name": "uint16x8_t vcgtq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [16]",
      "function_cn": "[向量] 比较大于 [16]"
    },
    {
      "name": "vcgt_u32",
      "full name": "uint32x2_t vcgt_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [32]",
      "function_cn": "[向量] 比较大于 [32]"
    },
    {
      "name": "vcgtq_u32",
      "full name": "uint32x4_t vcgtq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [32]",
      "function_cn": "[向量] 比较大于 [32]"
    },
    {
      "name": "vcgt_f32",
      "full name": "uint32x2_t vcgt_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [32]",
      "function_cn": "[向量] 比较大于 [32]"
    },
    {
      "name": "vcgtq_f32",
      "full name": "uint32x4_t vcgtq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_cmpgt_ps",
      "Intel Asm": "cmpps",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [32]",
      "function_cn": "[向量] 比较大于 [32]"
    },
    {
      "name": "vcgt_s64",
      "full name": "uint64x1_t vcgt_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [64]",
      "function_cn": "[向量] 比较大于 [64]"
    },
    {
      "name": "vcgtq_s64",
      "full name": "uint64x2_t vcgtq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_cmpgt_epi64",
      "Intel Asm": "pcmpgtq",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [64]",
      "function_cn": "[向量] 比较大于 [64]"
    },
    {
      "name": "vcgt_u64",
      "full name": "uint64x1_t vcgt_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [64]",
      "function_cn": "[向量] 比较大于 [64]"
    },
    {
      "name": "vcgtq_u64",
      "full name": "uint64x2_t vcgtq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [64]",
      "function_cn": "[向量] 比较大于 [64]"
    },
    {
      "name": "vcgt_f64",
      "full name": "uint64x1_t vcgt_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [64]",
      "function_cn": "[向量] 比较大于 [64]"
    },
    {
      "name": "vcgtq_f64",
      "full name": "uint64x2_t vcgtq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_cmpgt_pd",
      "Intel Asm": "cmppd",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [64]",
      "function_cn": "[向量] 比较大于 [64]"
    },
    {
      "name": "vcgtd_s64",
      "full name": "uint64_t vcgtd_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[scalar] cmgt [64]",
      "function_cn": "[标量] 比较大于 [64]"
    },
    {
      "name": "vcgtd_u64",
      "full name": "uint64_t vcgtd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[scalar] cmhi [64]",
      "function_cn": "[标量] 比较大于 [64]"
    },
    {
      "name": "vcgts_f32",
      "full name": "uint32_t vcgts_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[scalar] fcmgt [32]",
      "function_cn": "[标量] 比较大于 [32]"
    },
    {
      "name": "vcgtd_f64",
      "full name": "uint64_t vcgtd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[scalar] fcmgt [64]",
      "function_cn": "[标量] 比较大于 [64]"
    },
    {
      "name": "vcgtz_s8",
      "full name": "uint8x8_t vcgtz_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [8]",
      "function_cn": "[向量] 比较是否大于零寄存器 [8]"
    },
    {
      "name": "vcgtzq_s8",
      "full name": "uint8x16_t vcgtzq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [8]",
      "function_cn": "[向量] 比较是否大于零寄存器 [8]"
    },
    {
      "name": "vcgtz_s16",
      "full name": "uint16x4_t vcgtz_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [16]",
      "function_cn": "[向量] 比较是否大于零寄存器 [16]"
    },
    {
      "name": "vcgtzq_s16",
      "full name": "uint16x8_t vcgtzq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [16]",
      "function_cn": "[向量] 比较是否大于零寄存器 [16]"
    },
    {
      "name": "vcgtz_s32",
      "full name": "uint32x2_t vcgtz_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [32]",
      "function_cn": "[向量] 比较是否大于零寄存器 [32]"
    },
    {
      "name": "vcgtzq_s32",
      "full name": "uint32x4_t vcgtzq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [32]",
      "function_cn": "[向量] 比较是否大于零寄存器 [32]"
    },
    {
      "name": "vcgtz_s64",
      "full name": "uint64x1_t vcgtz_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [64]",
      "function_cn": "[向量] 比较是否大于零寄存器 [64]"
    },
    {
      "name": "vcgtzq_s64",
      "full name": "uint64x2_t vcgtzq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [64]",
      "function_cn": "[向量] 比较是否大于零寄存器 [64]"
    },
    {
      "name": "vcgtz_f32",
      "full name": "uint32x2_t vcgtz_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [32]",
      "function_cn": "[向量] 比较是否大于零寄存器 [32]"
    },
    {
      "name": "vcgtzq_f32",
      "full name": "uint32x4_t vcgtzq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [32]",
      "function_cn": "[向量] 比较是否大于零寄存器 [32]"
    },
    {
      "name": "vcgtz_f64",
      "full name": "uint64x1_t vcgtz_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [64]",
      "function_cn": "[向量] 比较是否大于零寄存器 [64]"
    },
    {
      "name": "vcgtzq_f64",
      "full name": "uint64x2_t vcgtzq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [64]",
      "function_cn": "[向量] 比较是否大于零寄存器 [64]"
    },
    {
      "name": "vcgtzd_s64",
      "full name": "uint64_t vcgtzd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[scalar] cmgt [64]",
      "function_cn": "[标量] 比较是否大于零寄存器 [64]"
    },
    {
      "name": "vcgtzs_f32",
      "full name": "uint32_t vcgtzs_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[scalar] fcmgt [32]",
      "function_cn": "[标量] 比较是否大于零寄存器 [32]"
    },
    {
      "name": "vcgtzd_f64",
      "full name": "uint64_t vcgtzd_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[scalar] fcmgt [64]",
      "function_cn": "[标量] 比较是否大于零寄存器 [64]"
    },
    {
      "name": "vclt_s8",
      "full name": "uint8x8_t vclt_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [8]",
      "function_cn": "[向量] 比较小于 [8]"
    },
    {
      "name": "vcltq_s8",
      "full name": "uint8x16_t vcltq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_cmplt_epi8",
      "Intel Asm": "pcmpgtb",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [8]",
      "function_cn": "[向量] 比较小于 [8]"
    },
    {
      "name": "vclt_s16",
      "full name": "uint16x4_t vclt_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [16]",
      "function_cn": "[向量] 比较小于 [16]"
    },
    {
      "name": "vcltq_s16",
      "full name": "uint16x8_t vcltq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_cmplt_epi16",
      "Intel Asm": "pcmpgtw",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [16]",
      "function_cn": "[向量] 比较小于 [16]"
    },
    {
      "name": "vclt_s32",
      "full name": "uint32x2_t vclt_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [32]",
      "function_cn": "[向量] 比较小于 [32]"
    },
    {
      "name": "vcltq_s32",
      "full name": "uint32x4_t vcltq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_cmplt_epi32",
      "Intel Asm": "pcmpgtd",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [32]",
      "function_cn": "[向量] 比较小于 [32]"
    },
    {
      "name": "vclt_u8",
      "full name": "uint8x8_t vclt_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [8]",
      "function_cn": "[向量] 比较小于 [8]"
    },
    {
      "name": "vcltq_u8",
      "full name": "uint8x16_t vcltq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [8]",
      "function_cn": "[向量] 比较小于 [8]"
    },
    {
      "name": "vclt_u16",
      "full name": "uint16x4_t vclt_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [16]",
      "function_cn": "[向量] 比较小于 [16]"
    },
    {
      "name": "vcltq_u16",
      "full name": "uint16x8_t vcltq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [16]",
      "function_cn": "[向量] 比较小于 [16]"
    },
    {
      "name": "vclt_u32",
      "full name": "uint32x2_t vclt_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [32]",
      "function_cn": "[向量] 比较小于 [32]"
    },
    {
      "name": "vcltq_u32",
      "full name": "uint32x4_t vcltq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [32]",
      "function_cn": "[向量] 比较小于 [32]"
    },
    {
      "name": "vclt_f32",
      "full name": "uint32x2_t vclt_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [32]",
      "function_cn": "[向量] 比较小于 [32]"
    },
    {
      "name": "vcltq_f32",
      "full name": "uint32x4_t vcltq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_cmplt_ps",
      "Intel Asm": "cmpps",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [32]",
      "function_cn": "[向量] 比较小于 [32]"
    },
    {
      "name": "vclt_s64",
      "full name": "uint64x1_t vclt_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [64]",
      "function_cn": "[向量] 比较小于 [64]"
    },
    {
      "name": "vcltq_s64",
      "full name": "uint64x2_t vcltq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[vector] cmgt [64]",
      "function_cn": "[向量] 比较小于 [64]"
    },
    {
      "name": "vclt_u64",
      "full name": "uint64x1_t vclt_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [64]",
      "function_cn": "[向量] 比较小于 [64]"
    },
    {
      "name": "vcltq_u64",
      "full name": "uint64x2_t vcltq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[vector] cmhi [64]",
      "function_cn": "[向量] 比较小于 [64]"
    },
    {
      "name": "vclt_f64",
      "full name": "uint64x1_t vclt_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [64]",
      "function_cn": "[向量] 比较小于 [64]"
    },
    {
      "name": "vcltq_f64",
      "full name": "uint64x2_t vcltq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_cmplt_pd",
      "Intel Asm": "cmppd",
      "Arm Asm": "fcmgt",
      "function_en": "[vector] fcmgt [64]",
      "function_cn": "[向量] 比较小于 [64]"
    },
    {
      "name": "vcltd_s64",
      "full name": "uint64_t vcltd_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmgt",
      "function_en": "[scalar] cmgt [64]",
      "function_cn": "[标量] 比较小于 [64]"
    },
    {
      "name": "vcltd_u64",
      "full name": "uint64_t vcltd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmhi",
      "function_en": "[scalar] cmhi [64]",
      "function_cn": "[标量] 比较小于 [64]"
    },
    {
      "name": "vclts_f32",
      "full name": "uint32_t vclts_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[scalar] fcmgt [32]",
      "function_cn": "[标量] 比较小于 [32]"
    },
    {
      "name": "vcltd_f64",
      "full name": "uint64_t vcltd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmgt",
      "function_en": "[scalar] fcmgt [64]",
      "function_cn": "[标量] 比较小于 [64]"
    },
    {
      "name": "vcltz_s8",
      "full name": "uint8x8_t vcltz_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [8]",
      "function_cn": "[向量] 比较是否小于零寄存器 [8]"
    },
    {
      "name": "vcltzq_s8",
      "full name": "uint8x16_t vcltzq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [8]",
      "function_cn": "[向量] 比较是否小于零寄存器 [8]"
    },
    {
      "name": "vcltz_s16",
      "full name": "uint16x4_t vcltz_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [16]",
      "function_cn": "[向量] 比较是否小于零寄存器 [16]"
    },
    {
      "name": "vcltzq_s16",
      "full name": "uint16x8_t vcltzq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [16]",
      "function_cn": "[向量] 比较是否小于零寄存器 [16]"
    },
    {
      "name": "vcltz_s32",
      "full name": "uint32x2_t vcltz_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [32]",
      "function_cn": "[向量] 比较是否小于零寄存器 [32]"
    },
    {
      "name": "vcltzq_s32",
      "full name": "uint32x4_t vcltzq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [32]",
      "function_cn": "[向量] 比较是否小于零寄存器 [32]"
    },
    {
      "name": "vcltz_s64",
      "full name": "uint64x1_t vcltz_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [64]",
      "function_cn": "[向量] 比较是否小于零寄存器 [64]"
    },
    {
      "name": "vcltzq_s64",
      "full name": "uint64x2_t vcltzq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[vector] cmlt [64]",
      "function_cn": "[向量] 比较是否小于零寄存器 [64]"
    },
    {
      "name": "vcltz_f32",
      "full name": "uint32x2_t vcltz_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmlt",
      "function_en": "[vector] fcmlt [32]",
      "function_cn": "[向量] 比较是否小于零寄存器 [32]"
    },
    {
      "name": "vcltzq_f32",
      "full name": "uint32x4_t vcltzq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmlt",
      "function_en": "[vector] fcmlt [32]",
      "function_cn": "[向量] 比较是否小于零寄存器 [32]"
    },
    {
      "name": "vcltz_f64",
      "full name": "uint64x1_t vcltz_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmlt",
      "function_en": "[vector] fcmlt [64]",
      "function_cn": "[向量] 比较是否小于零寄存器 [64]"
    },
    {
      "name": "vcltzq_f64",
      "full name": "uint64x2_t vcltzq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmlt",
      "function_en": "[vector] fcmlt [64]",
      "function_cn": "[向量] 比较是否小于零寄存器 [64]"
    },
    {
      "name": "vcltzd_s64",
      "full name": "uint64_t vcltzd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmlt",
      "function_en": "[scalar] cmlt [64]",
      "function_cn": "[标量] 比较是否小于零寄存器 [64]"
    },
    {
      "name": "vcltzs_f32",
      "full name": "uint32_t vcltzs_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmlt",
      "function_en": "[scalar] fcmlt [32]",
      "function_cn": "[标量] 比较是否小于零寄存器 [32]"
    },
    {
      "name": "vcltzd_f64",
      "full name": "uint64_t vcltzd_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcmlt",
      "function_en": "[scalar] fcmlt [64]",
      "function_cn": "[标量] 比较是否小于零寄存器 [64]"
    },
    {
      "name": "vcage_f32",
      "full name": "uint32x2_t vcage_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [32]",
      "function_cn": "[向量] 比较绝对值大于等于 [32]"
    },
    {
      "name": "vcageq_f32",
      "full name": "uint32x4_t vcageq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [32]",
      "function_cn": "[向量] 比较绝对值大于等于 [32]"
    },
    {
      "name": "vcage_f64",
      "full name": "uint64x1_t vcage_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [64]",
      "function_cn": "[向量] 比较绝对值大于等于 [64]"
    },
    {
      "name": "vcageq_f64",
      "full name": "uint64x2_t vcageq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [64]",
      "function_cn": "[向量] 比较绝对值大于等于 [64]"
    },
    {
      "name": "vcages_f32",
      "full name": "uint32_t vcages_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[scalar] facge [32]",
      "function_cn": "[标量] 比较绝对值大于等于 [32]"
    },
    {
      "name": "vcaged_f64",
      "full name": "uint64_t vcaged_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[scalar] facge [64]",
      "function_cn": "[标量] 比较绝对值大于等于 [64]"
    },
    {
      "name": "vcale_f32",
      "full name": "uint32x2_t vcale_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [32]",
      "function_cn": "[向量] 比较绝对值小于等于 [32]"
    },
    {
      "name": "vcaleq_f32",
      "full name": "uint32x4_t vcaleq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [32]",
      "function_cn": "[向量] 比较绝对值小于等于 [32]"
    },
    {
      "name": "vcale_f64",
      "full name": "uint64x1_t vcale_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [64]",
      "function_cn": "[向量] 比较绝对值小于等于 [64]"
    },
    {
      "name": "vcaleq_f64",
      "full name": "uint64x2_t vcaleq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[vector] facge [64]",
      "function_cn": "[向量] 比较绝对值小于等于 [64]"
    },
    {
      "name": "vcales_f32",
      "full name": "uint32_t vcales_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[scalar] facge [32]",
      "function_cn": "[标量] 比较绝对值小于等于 [32]"
    },
    {
      "name": "vcaled_f64",
      "full name": "uint64_t vcaled_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facge",
      "function_en": "[scalar] facge [64]",
      "function_cn": "[标量] 比较绝对值小于等于 [64]"
    },
    {
      "name": "vcagt_f32",
      "full name": "uint32x2_t vcagt_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [32]",
      "function_cn": "[向量] 比较绝对值大于 [32]"
    },
    {
      "name": "vcagtq_f32",
      "full name": "uint32x4_t vcagtq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [32]",
      "function_cn": "[向量] 比较绝对值大于 [32]"
    },
    {
      "name": "vcagt_f64",
      "full name": "uint64x1_t vcagt_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [64]",
      "function_cn": "[向量] 比较绝对值大于 [64]"
    },
    {
      "name": "vcagtq_f64",
      "full name": "uint64x2_t vcagtq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [64]",
      "function_cn": "[向量] 比较绝对值大于 [64]"
    },
    {
      "name": "vcagts_f32",
      "full name": "uint32_t vcagts_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[scalar] facgt [32]",
      "function_cn": "[标量] 比较绝对值大于 [32]"
    },
    {
      "name": "vcagtd_f64",
      "full name": "uint64_t vcagtd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[scalar] facgt [32]",
      "function_cn": "[标量] 比较绝对值大于 [32]"
    },
    {
      "name": "vcalt_f32",
      "full name": "uint32x2_t vcalt_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [32]",
      "function_cn": "[向量] 比较绝对值小于 [32]"
    },
    {
      "name": "vcaltq_f32",
      "full name": "uint32x4_t vcaltq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [32]",
      "function_cn": "[向量] 比较绝对值小于 [32]"
    },
    {
      "name": "vcalt_f64",
      "full name": "uint64x1_t vcalt_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [64]",
      "function_cn": "[向量] 比较绝对值小于 [64]"
    },
    {
      "name": "vcaltq_f64",
      "full name": "uint64x2_t vcaltq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[vector] facgt [64]",
      "function_cn": "[向量] 比较绝对值小于 [64]"
    },
    {
      "name": "vcalts_f32",
      "full name": "uint32_t vcalts_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[scalar] facgt [32]",
      "function_cn": "[标量] 比较绝对值小于 [32]"
    },
    {
      "name": "vcaltd_f64",
      "full name": "uint64_t vcaltd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "facgt",
      "function_en": "[scalar] facgt [64]",
      "function_cn": "[标量] 比较绝对值小于 [64]"
    },
    {
      "name": "vtst_s8",
      "full name": "uint8x8_t vtst_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [8]",
      "function_cn": "[向量] 按位比较测试位非零 [8]"
    },
    {
      "name": "vtstq_s8",
      "full name": "uint8x16_t vtstq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [8]",
      "function_cn": "[向量] 按位比较测试位非零 [8]"
    },
    {
      "name": "vtst_s16",
      "full name": "uint16x4_t vtst_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [16]",
      "function_cn": "[向量] 按位比较测试位非零 [16]"
    },
    {
      "name": "vtstq_s16",
      "full name": "uint16x8_t vtstq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [16]",
      "function_cn": "[向量] 按位比较测试位非零 [16]"
    },
    {
      "name": "vtst_s32",
      "full name": "uint32x2_t vtst_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [32]",
      "function_cn": "[向量] 按位比较测试位非零 [32]"
    },
    {
      "name": "vtstq_s32",
      "full name": "uint32x4_t vtstq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [32]",
      "function_cn": "[向量] 按位比较测试位非零 [32]"
    },
    {
      "name": "vtst_u8",
      "full name": "uint8x8_t vtst_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [8]",
      "function_cn": "[向量] 按位比较测试位非零 [8]"
    },
    {
      "name": "vtstq_u8",
      "full name": "uint8x16_t vtstq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [8]",
      "function_cn": "[向量] 按位比较测试位非零 [8]"
    },
    {
      "name": "vtst_u16",
      "full name": "uint16x4_t vtst_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [16]",
      "function_cn": "[向量] 按位比较测试位非零 [16]"
    },
    {
      "name": "vtstq_u16",
      "full name": "uint16x8_t vtstq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [16]",
      "function_cn": "[向量] 按位比较测试位非零 [16]"
    },
    {
      "name": "vtst_u32",
      "full name": "uint32x2_t vtst_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [32]",
      "function_cn": "[向量] 按位比较测试位非零 [32]"
    },
    {
      "name": "vtstq_u32",
      "full name": "uint32x4_t vtstq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [32]",
      "function_cn": "[向量] 按位比较测试位非零 [32]"
    },
    {
      "name": "vtst_p8",
      "full name": "uint8x8_t vtst_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [8]",
      "function_cn": "[向量] 按位比较测试位非零 [8]"
    },
    {
      "name": "vtstq_p8",
      "full name": "uint8x16_t vtstq_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [8]",
      "function_cn": "[向量] 按位比较测试位非零 [8]"
    },
    {
      "name": "vtst_s64",
      "full name": "uint64x1_t vtst_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [64]",
      "function_cn": "[向量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vtstq_s64",
      "full name": "uint64x2_t vtstq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [64]",
      "function_cn": "[向量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vtst_u64",
      "full name": "uint64x1_t vtst_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [64]",
      "function_cn": "[向量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vtstq_u64",
      "full name": "uint64x2_t vtstq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [64]",
      "function_cn": "[向量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vtst_p64",
      "full name": "uint64x1_t vtst_p64(poly64x1_t a, poly64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [64]",
      "function_cn": "[向量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vtstq_p64",
      "full name": "uint64x2_t vtstq_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[vector] cmtst [64]",
      "function_cn": "[向量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vtstd_s64",
      "full name": "uint64_t vtstd_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[scalar] cmtst [64]",
      "function_cn": "[标量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vtstd_u64",
      "full name": "uint64_t vtstd_u64(uint64_t a, uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cmtst",
      "function_en": "[scalar] cmtst [64]",
      "function_cn": "[标量] 按位比较测试位非零 [64]"
    },
    {
      "name": "vabd_s8",
      "full name": "int8x8_t vabd_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabd",
      "function_en": "[vector] sabd [8]",
      "function_cn": "[向量] 绝对差值 [8]"
    },
    {
      "name": "vabdq_s8",
      "full name": "int8x16_t vabdq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabd",
      "function_en": "[vector] sabd [8]",
      "function_cn": "[向量] 绝对差值 [8]"
    },
    {
      "name": "vabd_s16",
      "full name": "int16x4_t vabd_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabd",
      "function_en": "[vector] sabd [16]",
      "function_cn": "[向量] 绝对差值 [16]"
    },
    {
      "name": "vabdq_s16",
      "full name": "int16x8_t vabdq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabd",
      "function_en": "[vector] sabd [16]",
      "function_cn": "[向量] 绝对差值 [16]"
    },
    {
      "name": "vabd_s32",
      "full name": "int32x2_t vabd_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabd",
      "function_en": "[vector] sabd [32]",
      "function_cn": "[向量] 绝对差值 [32]"
    },
    {
      "name": "vabdq_s32",
      "full name": "int32x4_t vabdq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabd",
      "function_en": "[vector] sabd [32]",
      "function_cn": "[向量] 绝对差值 [32]"
    },
    {
      "name": "vabd_u8",
      "full name": "uint8x8_t vabd_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabd",
      "function_en": "[vector] uabd [8]",
      "function_cn": "[向量] 绝对差值 [8]"
    },
    {
      "name": "vabdq_u8",
      "full name": "uint8x16_t vabdq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabd",
      "function_en": "[vector] uabd [8]",
      "function_cn": "[向量] 绝对差值 [8]"
    },
    {
      "name": "vabd_u16",
      "full name": "uint16x4_t vabd_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabd",
      "function_en": "[vector] uabd [16]",
      "function_cn": "[向量] 绝对差值 [16]"
    },
    {
      "name": "vabdq_u16",
      "full name": "uint16x8_t vabdq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabd",
      "function_en": "[vector] uabd [16]",
      "function_cn": "[向量] 绝对差值 [16]"
    },
    {
      "name": "vabd_u32",
      "full name": "uint32x2_t vabd_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabd",
      "function_en": "[vector] uabd [32]",
      "function_cn": "[向量] 绝对差值 [32]"
    },
    {
      "name": "vabdq_u32",
      "full name": "uint32x4_t vabdq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabd",
      "function_en": "[vector] uabd [32]",
      "function_cn": "[向量] 绝对差值 [32]"
    },
    {
      "name": "vabd_f32",
      "full name": "float32x2_t vabd_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabd",
      "function_en": "[vector] fabd [32]",
      "function_cn": "[向量] 绝对差值 [32]"
    },
    {
      "name": "vabdq_f32",
      "full name": "float32x4_t vabdq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabd",
      "function_en": "[vector] fabd [32]",
      "function_cn": "[向量] 绝对差值 [32]"
    },
    {
      "name": "vabd_f64",
      "full name": "float64x1_t vabd_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabd",
      "function_en": "[vector] fabd [64]",
      "function_cn": "[向量] 绝对差值 [64]"
    },
    {
      "name": "vabdq_f64",
      "full name": "float64x2_t vabdq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabd",
      "function_en": "[vector] fabd [64]",
      "function_cn": "[向量] 绝对差值 [64]"
    },
    {
      "name": "vabds_f32",
      "full name": "float32_t vabds_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabd",
      "function_en": "[scalar] fabd [32]",
      "function_cn": "[标量] 绝对差值 [32]"
    },
    {
      "name": "vabdd_f64",
      "full name": "float64_t vabdd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabd",
      "function_en": "[scalar] fabd [64]",
      "function_cn": "[标量] 绝对差值 [64]"
    },
    {
      "name": "vabdl_s8",
      "full name": "int16x8_t vabdl_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabdl",
      "function_en": "[vector] sabdl [8]",
      "function_cn": "[向量] 长型绝对差值 [8]"
    },
    {
      "name": "vabdl_s16",
      "full name": "int32x4_t vabdl_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabdl",
      "function_en": "[vector] sabdl [16]",
      "function_cn": "[向量] 长型绝对差值 [16]"
    },
    {
      "name": "vabdl_s32",
      "full name": "int64x2_t vabdl_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabdl",
      "function_en": "[vector] sabdl [32]",
      "function_cn": "[向量] 长型绝对差值 [32]"
    },
    {
      "name": "vabdl_u8",
      "full name": "uint16x8_t vabdl_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabdl",
      "function_en": "[vector] uabdl [8]",
      "function_cn": "[向量] 长型绝对差值 [8]"
    },
    {
      "name": "vabdl_u16",
      "full name": "uint32x4_t vabdl_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabdl",
      "function_en": "[vector] uabdl [16]",
      "function_cn": "[向量] 长型绝对差值 [16]"
    },
    {
      "name": "vabdl_u32",
      "full name": "uint64x2_t vabdl_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabdl",
      "function_en": "[vector] uabdl [32]",
      "function_cn": "[向量] 长型绝对差值 [32]"
    },
    {
      "name": "vabdl_high_s8",
      "full name": "int16x8_t vabdl_high_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabdl2",
      "function_en": "[vector] sabdl2 [8]",
      "function_cn": "[向量] 长型绝对差值 [8]"
    },
    {
      "name": "vabdl_high_s16",
      "full name": "int32x4_t vabdl_high_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabdl2",
      "function_en": "[vector] sabdl2 [16]",
      "function_cn": "[向量] 长型绝对差值 [16]"
    },
    {
      "name": "vabdl_high_s32",
      "full name": "int64x2_t vabdl_high_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabdl2",
      "function_en": "[vector] sabdl2 [32]",
      "function_cn": "[向量] 长型绝对差值 [32]"
    },
    {
      "name": "vabdl_high_u8",
      "full name": "uint16x8_t vabdl_high_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabdl2",
      "function_en": "[vector] uabdl2 [8]",
      "function_cn": "[向量] 长型绝对差值 [8]"
    },
    {
      "name": "vabdl_high_u16",
      "full name": "uint32x4_t vabdl_high_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabdl2",
      "function_en": "[vector] uabdl2 [16]",
      "function_cn": "[向量] 长型绝对差值 [16]"
    },
    {
      "name": "vabdl_high_u32",
      "full name": "uint64x2_t vabdl_high_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabdl2",
      "function_en": "[vector] uabdl2 [32]",
      "function_cn": "[向量] 长型绝对差值 [32]"
    },
    {
      "name": "vaba_s8",
      "full name": "int8x8_t vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saba",
      "function_en": "[vector] saba [8]",
      "function_cn": "[向量] 绝对差值再相加 [8]"
    },
    {
      "name": "vabaq_s8",
      "full name": "int8x16_t vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saba",
      "function_en": "[vector] saba [8]",
      "function_cn": "[向量] 绝对差值再相加 [8]"
    },
    {
      "name": "vaba_s16",
      "full name": "int16x4_t vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saba",
      "function_en": "[vector] saba [16]",
      "function_cn": "[向量] 绝对差值再相加 [16]"
    },
    {
      "name": "vabaq_s16",
      "full name": "int16x8_t vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saba",
      "function_en": "[vector] saba [16]",
      "function_cn": "[向量] 绝对差值再相加 [16]"
    },
    {
      "name": "vaba_s32",
      "full name": "int32x2_t vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saba",
      "function_en": "[vector] saba [32]",
      "function_cn": "[向量] 绝对差值再相加 [32]"
    },
    {
      "name": "vabaq_s32",
      "full name": "int32x4_t vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saba",
      "function_en": "[vector] saba [32]",
      "function_cn": "[向量] 绝对差值再相加 [32]"
    },
    {
      "name": "vaba_u8",
      "full name": "uint8x8_t vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaba",
      "function_en": "[vector] uaba [8]",
      "function_cn": "[向量] 绝对差值再相加 [8]"
    },
    {
      "name": "vabaq_u8",
      "full name": "uint8x16_t vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaba",
      "function_en": "[vector] uaba [8]",
      "function_cn": "[向量] 绝对差值再相加 [8]"
    },
    {
      "name": "vaba_u16",
      "full name": "uint16x4_t vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaba",
      "function_en": "[vector] uaba [16]",
      "function_cn": "[向量] 绝对差值再相加 [16]"
    },
    {
      "name": "vabaq_u16",
      "full name": "uint16x8_t vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaba",
      "function_en": "[vector] uaba [16]",
      "function_cn": "[向量] 绝对差值再相加 [16]"
    },
    {
      "name": "vaba_u32",
      "full name": "uint32x2_t vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaba",
      "function_en": "[vector] uaba [32]",
      "function_cn": "[向量] 绝对差值再相加 [32]"
    },
    {
      "name": "vabaq_u32",
      "full name": "uint32x4_t vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaba",
      "function_en": "[vector] uaba [32]",
      "function_cn": "[向量] 绝对差值再相加 [32]"
    },
    {
      "name": "vabal_s8",
      "full name": "int16x8_t vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabal",
      "function_en": "[vector] sabal [8]",
      "function_cn": "[向量] 长型绝对差值再相加 [8]"
    },
    {
      "name": "vabal_s16",
      "full name": "int32x4_t vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabal",
      "function_en": "[vector] sabal [16]",
      "function_cn": "[向量] 长型绝对差值再相加 [16]"
    },
    {
      "name": "vabal_s32",
      "full name": "int64x2_t vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabal",
      "function_en": "[vector] sabal [32]",
      "function_cn": "[向量] 长型绝对差值再相加 [32]"
    },
    {
      "name": "vabal_u8",
      "full name": "uint16x8_t vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabal",
      "function_en": "[vector] uabal [8]",
      "function_cn": "[向量] 长型绝对差值再相加 [8]"
    },
    {
      "name": "vabal_u16",
      "full name": "uint32x4_t vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabal",
      "function_en": "[vector] uabal [16]",
      "function_cn": "[向量] 长型绝对差值再相加 [16]"
    },
    {
      "name": "vabal_u32",
      "full name": "uint64x2_t vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabal",
      "function_en": "[vector] uabal [32]",
      "function_cn": "[向量] 长型绝对差值再相加 [32]"
    },
    {
      "name": "vabal_high_s8",
      "full name": "int16x8_t vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabal2",
      "function_en": "[vector] sabal2 [8]",
      "function_cn": "[向量] 长型绝对差值再相加 [8]"
    },
    {
      "name": "vabal_high_s16",
      "full name": "int32x4_t vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabal2",
      "function_en": "[vector] sabal2 [16]",
      "function_cn": "[向量] 长型绝对差值再相加 [16]"
    },
    {
      "name": "vabal_high_s32",
      "full name": "int64x2_t vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sabal2",
      "function_en": "[vector] sabal2 [32]",
      "function_cn": "[向量] 长型绝对差值再相加 [32]"
    },
    {
      "name": "vabal_high_u8",
      "full name": "uint16x8_t vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabal2",
      "function_en": "[vector] uabal2 [8]",
      "function_cn": "[向量] 长型绝对差值再相加 [8]"
    },
    {
      "name": "vabal_high_u16",
      "full name": "uint32x4_t vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabal2",
      "function_en": "[vector] uabal2 [16]",
      "function_cn": "[向量] 长型绝对差值再相加 [16]"
    },
    {
      "name": "vabal_high_u32",
      "full name": "uint64x2_t vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uabal2",
      "function_en": "[vector] uabal2 [32]",
      "function_cn": "[向量] 长型绝对差值再相加 [32]"
    },
    {
      "name": "vmax_s8",
      "full name": "int8x8_t vmax_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smax",
      "function_en": "[vector] smax [8]",
      "function_cn": "[向量] 求最大值 [8]"
    },
    {
      "name": "vmaxq_s8",
      "full name": "int8x16_t vmaxq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_max_epi8",
      "Intel Asm": "pmaxsb",
      "Arm Asm": "smax",
      "function_en": "[vector] smax [8]",
      "function_cn": "[向量] 求最大值 [8]"
    },
    {
      "name": "vmax_s16",
      "full name": "int16x4_t vmax_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_max_pi16",
      "Intel Asm": "pmaxsw",
      "Arm Asm": "smax",
      "function_en": "[vector] smax [16]",
      "function_cn": "[向量] 求最大值 [16]"
    },
    {
      "name": "vmaxq_s16",
      "full name": "int16x8_t vmaxq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_max_epi16",
      "Intel Asm": "pmaxsw",
      "Arm Asm": "smax",
      "function_en": "[vector] smax [16]",
      "function_cn": "[向量] 求最大值 [16]"
    },
    {
      "name": "vmax_s32",
      "full name": "int32x2_t vmax_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smax",
      "function_en": "[vector] smax [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmaxq_s32",
      "full name": "int32x4_t vmaxq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_max_epi32",
      "Intel Asm": "pmaxsd",
      "Arm Asm": "smax",
      "function_en": "[vector] smax [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmax_u8",
      "full name": "uint8x8_t vmax_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "_mm_max_pu8",
      "Intel Asm": "pmaxub",
      "Arm Asm": "umax",
      "function_en": "[vector] umax [8]",
      "function_cn": "[向量] 求最大值 [8]"
    },
    {
      "name": "vmaxq_u8",
      "full name": "uint8x16_t vmaxq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "_mm_max_epu8",
      "Intel Asm": "pmaxub",
      "Arm Asm": "umax",
      "function_en": "[vector] umax [8]",
      "function_cn": "[向量] 求最大值 [8]"
    },
    {
      "name": "vmax_u16",
      "full name": "uint16x4_t vmax_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umax",
      "function_en": "[vector] umax [16]",
      "function_cn": "[向量] 求最大值 [16]"
    },
    {
      "name": "vmaxq_u16",
      "full name": "uint16x8_t vmaxq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "_mm_max_epu16",
      "Intel Asm": "pmaxuw",
      "Arm Asm": "umax",
      "function_en": "[vector] umax [16]",
      "function_cn": "[向量] 求最大值 [16]"
    },
    {
      "name": "vmax_u32",
      "full name": "uint32x2_t vmax_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umax",
      "function_en": "[vector] umax [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmaxq_u32",
      "full name": "uint32x4_t vmaxq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "_mm_max_epu32",
      "Intel Asm": "pmaxud",
      "Arm Asm": "umax",
      "function_en": "[vector] umax [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmax_f32",
      "full name": "float32x2_t vmax_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmax",
      "function_en": "[vector] fmax [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmaxq_f32",
      "full name": "float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_max_ps",
      "Intel Asm": "maxps",
      "Arm Asm": "fmax",
      "function_en": "[vector] fmax [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmax_f64",
      "full name": "float64x1_t vmax_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmax",
      "function_en": "[vector] fmax [64]",
      "function_cn": "[向量] 求最大值 [64]"
    },
    {
      "name": "vmaxq_f64",
      "full name": "float64x2_t vmaxq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_max_pd",
      "Intel Asm": "maxpd",
      "Arm Asm": "fmax",
      "function_en": "[vector] fmax [64]",
      "function_cn": "[向量] 求最大值 [64]"
    },
    {
      "name": "vmin_s8",
      "full name": "int8x8_t vmin_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smin",
      "function_en": "[vector] smin [8]",
      "function_cn": "[向量] 求最小值 [8]"
    },
    {
      "name": "vminq_s8",
      "full name": "int8x16_t vminq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_min_epi8",
      "Intel Asm": "pminsb",
      "Arm Asm": "smin",
      "function_en": "[vector] smin [8]",
      "function_cn": "[向量] 求最小值 [8]"
    },
    {
      "name": "vmin_s16",
      "full name": "int16x4_t vmin_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_min_pi16",
      "Intel Asm": "pminsw",
      "Arm Asm": "smin",
      "function_en": "[vector] smin [16]",
      "function_cn": "[向量] 求最小值 [16]"
    },
    {
      "name": "vminq_s16",
      "full name": "int16x8_t vminq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_min_epi16",
      "Intel Asm": "pminsw",
      "Arm Asm": "smin",
      "function_en": "[vector] smin [16]",
      "function_cn": "[向量] 求最小值 [16]"
    },
    {
      "name": "vmin_s32",
      "full name": "int32x2_t vmin_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smin",
      "function_en": "[vector] smin [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vminq_s32",
      "full name": "int32x4_t vminq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_min_epi32",
      "Intel Asm": "pminsd",
      "Arm Asm": "smin",
      "function_en": "[vector] smin [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vmin_u8",
      "full name": "uint8x8_t vmin_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "_mm_min_pu8",
      "Intel Asm": "pminub",
      "Arm Asm": "umin",
      "function_en": "[vector] umin [8]",
      "function_cn": "[向量] 求最小值 [8]"
    },
    {
      "name": "vminq_u8",
      "full name": "uint8x16_t vminq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "_mm_min_epu8",
      "Intel Asm": "pminub",
      "Arm Asm": "umin",
      "function_en": "[vector] umin [8]",
      "function_cn": "[向量] 求最小值 [8]"
    },
    {
      "name": "vmin_u16",
      "full name": "uint16x4_t vmin_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umin",
      "function_en": "[vector] umin [16]",
      "function_cn": "[向量] 求最小值 [16]"
    },
    {
      "name": "vminq_u16",
      "full name": "uint16x8_t vminq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "_mm_min_epu16",
      "Intel Asm": "pminuw",
      "Arm Asm": "umin",
      "function_en": "[vector] umin [16]",
      "function_cn": "[向量] 求最小值 [16]"
    },
    {
      "name": "vmin_u32",
      "full name": "uint32x2_t vmin_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umin",
      "function_en": "[vector] umin [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vminq_u32",
      "full name": "uint32x4_t vminq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "_mm_min_epu32",
      "Intel Asm": "pminud",
      "Arm Asm": "umin",
      "function_en": "[vector] umin [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vmin_f32",
      "full name": "float32x2_t vmin_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmin",
      "function_en": "[vector] fmin [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vminq_f32",
      "full name": "float32x4_t vminq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_min_ps",
      "Intel Asm": "minps",
      "Arm Asm": "fmin",
      "function_en": "[vector] fmin [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vmin_f64",
      "full name": "float64x1_t vmin_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmin",
      "function_en": "[vector] fmin [64]",
      "function_cn": "[向量] 求最小值 [64]"
    },
    {
      "name": "vminq_f64",
      "full name": "float64x2_t vminq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_min_pd",
      "Intel Asm": "minpd",
      "Arm Asm": "fmin",
      "function_en": "[vector] fmin [64]",
      "function_cn": "[向量] 求最小值 [64]"
    },
    {
      "name": "vmaxnm_f32",
      "full name": "float32x2_t vmaxnm_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnm",
      "function_en": "[vector] fmaxnm [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmaxnmq_f32",
      "full name": "float32x4_t vmaxnmq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_max_pd",
      "Intel Asm": "maxps",
      "Arm Asm": "fmaxnm",
      "function_en": "[vector] fmaxnm [32]",
      "function_cn": "[向量] 求最大值 [32]"
    },
    {
      "name": "vmaxnm_f64",
      "full name": "float64x1_t vmaxnm_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnm",
      "function_en": "[vector] fmaxnm [64]",
      "function_cn": "[向量] 求最大值 [64]"
    },
    {
      "name": "vmaxnmq_f64",
      "full name": "float64x2_t vmaxnmq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_max_pd",
      "Intel Asm": "maxpd",
      "Arm Asm": "fmaxnm",
      "function_en": "[vector] fmaxnm [64]",
      "function_cn": "[向量] 求最大值 [64]"
    },
    {
      "name": "vminnm_f32",
      "full name": "float32x2_t vminnm_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnm",
      "function_en": "[vector] fminnm [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vminnmq_f32",
      "full name": "float32x4_t vminnmq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_min_ps",
      "Intel Asm": "minps",
      "Arm Asm": "fminnm",
      "function_en": "[vector] fminnm [32]",
      "function_cn": "[向量] 求最小值 [32]"
    },
    {
      "name": "vminnm_f64",
      "full name": "float64x1_t vminnm_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnm",
      "function_en": "[vector] fminnm [64]",
      "function_cn": "[向量] 求最小值 [64]"
    },
    {
      "name": "vminnmq_f64",
      "full name": "float64x2_t vminnmq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_min_pd",
      "Intel Asm": "minpd",
      "Arm Asm": "fminnm",
      "function_en": "[vector] fminnm [64]",
      "function_cn": "[向量] 求最小值 [64]"
    },
    {
      "name": "vshl_s8",
      "full name": "int8x8_t vshl_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshlq_s8",
      "full name": "int8x16_t vshlq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshl_s16",
      "full name": "int16x4_t vshl_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshlq_s16",
      "full name": "int16x8_t vshlq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshl_s32",
      "full name": "int32x2_t vshl_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshlq_s32",
      "full name": "int32x4_t vshlq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshl_s64",
      "full name": "int64x1_t vshl_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshlq_s64",
      "full name": "int64x2_t vshlq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[vector] sshl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshl_u8",
      "full name": "uint8x8_t vshl_u8(uint8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshlq_u8",
      "full name": "uint8x16_t vshlq_u8(uint8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshl_u16",
      "full name": "uint16x4_t vshl_u16(uint16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshlq_u16",
      "full name": "uint16x8_t vshlq_u16(uint16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshl_u32",
      "full name": "uint32x2_t vshl_u32(uint32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshlq_u32",
      "full name": "uint32x4_t vshlq_u32(uint32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshl_u64",
      "full name": "uint64x1_t vshl_u64(uint64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshlq_u64",
      "full name": "uint64x2_t vshlq_u64(uint64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[vector] ushl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshld_s64",
      "full name": "int64_t vshld_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshl",
      "function_en": "[scalar] sshl [64]",
      "function_cn": "[标量] 左移 [64]"
    },
    {
      "name": "vshld_u64",
      "full name": "uint64_t vshld_u64(uint64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushl",
      "function_en": "[scalar] ushl [64]",
      "function_cn": "[标量] 左移 [64]"
    },
    {
      "name": "vqshl_s8",
      "full name": "int8x8_t vqshl_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshlq_s8",
      "full name": "int8x16_t vqshlq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshl_s16",
      "full name": "int16x4_t vqshl_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshlq_s16",
      "full name": "int16x8_t vqshlq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshl_s32",
      "full name": "int32x2_t vqshl_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshlq_s32",
      "full name": "int32x4_t vqshlq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshl_s64",
      "full name": "int64x1_t vqshl_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshlq_s64",
      "full name": "int64x2_t vqshlq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshl_u8",
      "full name": "uint8x8_t vqshl_u8(uint8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshlq_u8",
      "full name": "uint8x16_t vqshlq_u8(uint8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshl_u16",
      "full name": "uint16x4_t vqshl_u16(uint16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshlq_u16",
      "full name": "uint16x8_t vqshlq_u16(uint16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshl_u32",
      "full name": "uint32x2_t vqshl_u32(uint32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshlq_u32",
      "full name": "uint32x4_t vqshlq_u32(uint32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshl_u64",
      "full name": "uint64x1_t vqshl_u64(uint64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshlq_u64",
      "full name": "uint64x2_t vqshlq_u64(uint64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshlb_s8",
      "full name": "int8_t vqshlb_s8(int8_t a, int8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [8]",
      "function_cn": "[标量] 饱和左移 [8]"
    },
    {
      "name": "vqshlh_s16",
      "full name": "int16_t vqshlh_s16(int16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [16]",
      "function_cn": "[标量] 饱和左移 [16]"
    },
    {
      "name": "vqshls_s32",
      "full name": "int32_t vqshls_s32(int32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [32]",
      "function_cn": "[标量] 饱和左移 [32]"
    },
    {
      "name": "vqshld_s64",
      "full name": "int64_t vqshld_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [64]",
      "function_cn": "[标量] 饱和左移 [64]"
    },
    {
      "name": "vqshlb_u8",
      "full name": "uint8_t vqshlb_u8(uint8_t a, int8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [8]",
      "function_cn": "[标量] 饱和左移 [8]"
    },
    {
      "name": "vqshlh_u16",
      "full name": "uint16_t vqshlh_u16(uint16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [16]",
      "function_cn": "[标量] 饱和左移 [16]"
    },
    {
      "name": "vqshls_u32",
      "full name": "uint32_t vqshls_u32(uint32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [32]",
      "function_cn": "[标量] 饱和左移 [32]"
    },
    {
      "name": "vqshld_u64",
      "full name": "uint64_t vqshld_u64(uint64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [64]",
      "function_cn": "[标量] 饱和左移 [64]"
    },
    {
      "name": "vrshl_s8",
      "full name": "int8x8_t vrshl_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [8]",
      "function_cn": "[向量] 舍入左移 [8]"
    },
    {
      "name": "vrshlq_s8",
      "full name": "int8x16_t vrshlq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [8]",
      "function_cn": "[向量] 舍入左移 [8]"
    },
    {
      "name": "vrshl_s16",
      "full name": "int16x4_t vrshl_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [16]",
      "function_cn": "[向量] 舍入左移 [16]"
    },
    {
      "name": "vrshlq_s16",
      "full name": "int16x8_t vrshlq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [16]",
      "function_cn": "[向量] 舍入左移 [16]"
    },
    {
      "name": "vrshl_s32",
      "full name": "int32x2_t vrshl_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [32]",
      "function_cn": "[向量] 舍入左移 [32]"
    },
    {
      "name": "vrshlq_s32",
      "full name": "int32x4_t vrshlq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [32]",
      "function_cn": "[向量] 舍入左移 [32]"
    },
    {
      "name": "vrshl_s64",
      "full name": "int64x1_t vrshl_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [64]",
      "function_cn": "[向量] 舍入左移 [64]"
    },
    {
      "name": "vrshlq_s64",
      "full name": "int64x2_t vrshlq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[vector] srshl [64]",
      "function_cn": "[向量] 舍入左移 [64]"
    },
    {
      "name": "vrshl_u8",
      "full name": "uint8x8_t vrshl_u8(uint8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [8]",
      "function_cn": "[向量] 舍入左移 [8]"
    },
    {
      "name": "vrshlq_u8",
      "full name": "uint8x16_t vrshlq_u8(uint8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [8]",
      "function_cn": "[向量] 舍入左移 [8]"
    },
    {
      "name": "vrshl_u16",
      "full name": "uint16x4_t vrshl_u16(uint16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [16]",
      "function_cn": "[向量] 舍入左移 [16]"
    },
    {
      "name": "vrshlq_u16",
      "full name": "uint16x8_t vrshlq_u16(uint16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [16]",
      "function_cn": "[向量] 舍入左移 [16]"
    },
    {
      "name": "vrshl_u32",
      "full name": "uint32x2_t vrshl_u32(uint32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [32]",
      "function_cn": "[向量] 舍入左移 [32]"
    },
    {
      "name": "vrshlq_u32",
      "full name": "uint32x4_t vrshlq_u32(uint32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [32]",
      "function_cn": "[向量] 舍入左移 [32]"
    },
    {
      "name": "vrshl_u64",
      "full name": "uint64x1_t vrshl_u64(uint64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [64]",
      "function_cn": "[向量] 舍入左移 [64]"
    },
    {
      "name": "vrshlq_u64",
      "full name": "uint64x2_t vrshlq_u64(uint64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[vector] urshl [64]",
      "function_cn": "[向量] 舍入左移 [64]"
    },
    {
      "name": "vrshld_s64",
      "full name": "int64_t vrshld_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshl",
      "function_en": "[scalar] srshl [64]",
      "function_cn": "[标量] 舍入左移 [64]"
    },
    {
      "name": "vrshld_u64",
      "full name": "uint64_t vrshld_u64(uint64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshl",
      "function_en": "[scalar] urshl [64]",
      "function_cn": "[标量] 舍入左移 [64]"
    },
    {
      "name": "vqrshl_s8",
      "full name": "int8x8_t vqrshl_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [8]",
      "function_cn": "[向量] 饱和舍入左移 [8]"
    },
    {
      "name": "vqrshlq_s8",
      "full name": "int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [8]",
      "function_cn": "[向量] 饱和舍入左移 [8]"
    },
    {
      "name": "vqrshl_s16",
      "full name": "int16x4_t vqrshl_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [16]",
      "function_cn": "[向量] 饱和舍入左移 [16]"
    },
    {
      "name": "vqrshlq_s16",
      "full name": "int16x8_t vqrshlq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [16]",
      "function_cn": "[向量] 饱和舍入左移 [16]"
    },
    {
      "name": "vqrshl_s32",
      "full name": "int32x2_t vqrshl_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [32]",
      "function_cn": "[向量] 饱和舍入左移 [32]"
    },
    {
      "name": "vqrshlq_s32",
      "full name": "int32x4_t vqrshlq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [32]",
      "function_cn": "[向量] 饱和舍入左移 [32]"
    },
    {
      "name": "vqrshl_s64",
      "full name": "int64x1_t vqrshl_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [64]",
      "function_cn": "[向量] 饱和舍入左移 [64]"
    },
    {
      "name": "vqrshlq_s64",
      "full name": "int64x2_t vqrshlq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[vector] sqrshl [64]",
      "function_cn": "[向量] 饱和舍入左移 [64]"
    },
    {
      "name": "vqrshl_u8",
      "full name": "uint8x8_t vqrshl_u8(uint8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [8]",
      "function_cn": "[向量] 饱和舍入左移 [8]"
    },
    {
      "name": "vqrshlq_u8",
      "full name": "uint8x16_t vqrshlq_u8(uint8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [8]",
      "function_cn": "[向量] 饱和舍入左移 [8]"
    },
    {
      "name": "vqrshl_u16",
      "full name": "uint16x4_t vqrshl_u16(uint16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [16]",
      "function_cn": "[向量] 饱和舍入左移 [16]"
    },
    {
      "name": "vqrshlq_u16",
      "full name": "uint16x8_t vqrshlq_u16(uint16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [16]",
      "function_cn": "[向量] 饱和舍入左移 [16]"
    },
    {
      "name": "vqrshl_u32",
      "full name": "uint32x2_t vqrshl_u32(uint32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [32]",
      "function_cn": "[向量] 饱和舍入左移 [32]"
    },
    {
      "name": "vqrshlq_u32",
      "full name": "uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [32]",
      "function_cn": "[向量] 饱和舍入左移 [32]"
    },
    {
      "name": "vqrshl_u64",
      "full name": "uint64x1_t vqrshl_u64(uint64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [64]",
      "function_cn": "[向量] 饱和舍入左移 [64]"
    },
    {
      "name": "vqrshlq_u64",
      "full name": "uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[vector] uqrshl [64]",
      "function_cn": "[向量] 饱和舍入左移 [64]"
    },
    {
      "name": "vqrshlb_s8",
      "full name": "int8_t vqrshlb_s8(int8_t a, int8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[scalar] sqrshl [8]",
      "function_cn": "[标量] 饱和舍入左移 [8]"
    },
    {
      "name": "vqrshlh_s16",
      "full name": "int16_t vqrshlh_s16(int16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[scalar] sqrshl [16]",
      "function_cn": "[标量] 饱和舍入左移 [16]"
    },
    {
      "name": "vqrshls_s32",
      "full name": "int32_t vqrshls_s32(int32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[scalar] sqrshl [32]",
      "function_cn": "[标量] 饱和舍入左移 [32]"
    },
    {
      "name": "vqrshld_s64",
      "full name": "int64_t vqrshld_s64(int64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshl",
      "function_en": "[scalar] sqrshl [64]",
      "function_cn": "[标量] 饱和舍入左移 [64]"
    },
    {
      "name": "vqrshlb_u8",
      "full name": "uint8_t vqrshlb_u8(uint8_t a, int8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[scalar] uqrshl [8]",
      "function_cn": "[标量] 饱和舍入左移 [8]"
    },
    {
      "name": "vqrshlh_u16",
      "full name": "uint16_t vqrshlh_u16(uint16_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[scalar] uqrshl [16]",
      "function_cn": "[标量] 饱和舍入左移 [16]"
    },
    {
      "name": "vqrshls_u32",
      "full name": "uint32_t vqrshls_u32(uint32_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[scalar] uqrshl [32]",
      "function_cn": "[标量] 饱和舍入左移 [32]"
    },
    {
      "name": "vqrshld_u64",
      "full name": "uint64_t vqrshld_u64(uint64_t a, int64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshl",
      "function_en": "[scalar] uqrshl [64]",
      "function_cn": "[标量] 饱和舍入左移 [64]"
    },
    {
      "name": "vshr_n_s8",
      "full name": "int8x8_t vshr_n_s8(int8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [8]",
      "function_cn": "[向量] 右移 [8]"
    },
    {
      "name": "vshrq_n_s8",
      "full name": "int8x16_t vshrq_n_s8(int8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [8]",
      "function_cn": "[向量] 右移 [8]"
    },
    {
      "name": "vshr_n_s16",
      "full name": "int16x4_t vshr_n_s16(int16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [16]",
      "function_cn": "[向量] 右移 [16]"
    },
    {
      "name": "vshrq_n_s16",
      "full name": "int16x8_t vshrq_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [16]",
      "function_cn": "[向量] 右移 [16]"
    },
    {
      "name": "vshr_n_s32",
      "full name": "int32x2_t vshr_n_s32(int32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [32]",
      "function_cn": "[向量] 右移 [32]"
    },
    {
      "name": "vshrq_n_s32",
      "full name": "int32x4_t vshrq_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [32]",
      "function_cn": "[向量] 右移 [32]"
    },
    {
      "name": "vshr_n_s64",
      "full name": "int64x1_t vshr_n_s64(int64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [64]",
      "function_cn": "[向量] 右移 [64]"
    },
    {
      "name": "vshrq_n_s64",
      "full name": "int64x2_t vshrq_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[vector] sshr [64]",
      "function_cn": "[向量] 右移 [64]"
    },
    {
      "name": "vshr_n_u8",
      "full name": "uint8x8_t vshr_n_u8(uint8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [8]",
      "function_cn": "[向量] 右移 [8]"
    },
    {
      "name": "vshrq_n_u8",
      "full name": "uint8x16_t vshrq_n_u8(uint8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [8]",
      "function_cn": "[向量] 右移 [8]"
    },
    {
      "name": "vshr_n_u16",
      "full name": "uint16x4_t vshr_n_u16(uint16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [16]",
      "function_cn": "[向量] 右移 [16]"
    },
    {
      "name": "vshrq_n_u16",
      "full name": "uint16x8_t vshrq_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [16]",
      "function_cn": "[向量] 右移 [16]"
    },
    {
      "name": "vshr_n_u32",
      "full name": "uint32x2_t vshr_n_u32(uint32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [32]",
      "function_cn": "[向量] 右移 [32]"
    },
    {
      "name": "vshrq_n_u32",
      "full name": "uint32x4_t vshrq_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [32]",
      "function_cn": "[向量] 右移 [32]"
    },
    {
      "name": "vshr_n_u64",
      "full name": "uint64x1_t vshr_n_u64(uint64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [64]",
      "function_cn": "[向量] 右移 [64]"
    },
    {
      "name": "vshrq_n_u64",
      "full name": "uint64x2_t vshrq_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[vector] ushr [64]",
      "function_cn": "[向量] 右移 [64]"
    },
    {
      "name": "vshrd_n_s64",
      "full name": "int64_t vshrd_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshr",
      "function_en": "[scalar] sshr [64]",
      "function_cn": "[标量] 右移 [64]"
    },
    {
      "name": "vshrd_n_u64",
      "full name": "uint64_t vshrd_n_u64(uint64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushr",
      "function_en": "[scalar] ushr [64]",
      "function_cn": "[标量] 右移 [64]"
    },
    {
      "name": "vshl_n_s8",
      "full name": "int8x8_t vshl_n_s8(int8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshlq_n_s8",
      "full name": "int8x16_t vshlq_n_s8(int8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshl_n_s16",
      "full name": "int16x4_t vshl_n_s16(int16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshlq_n_s16",
      "full name": "int16x8_t vshlq_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshl_n_s32",
      "full name": "int32x2_t vshl_n_s32(int32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshlq_n_s32",
      "full name": "int32x4_t vshlq_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshl_n_s64",
      "full name": "int64x1_t vshl_n_s64(int64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshlq_n_s64",
      "full name": "int64x2_t vshlq_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshl_n_u8",
      "full name": "uint8x8_t vshl_n_u8(uint8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshlq_n_u8",
      "full name": "uint8x16_t vshlq_n_u8(uint8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [8]",
      "function_cn": "[向量] 左移 [8]"
    },
    {
      "name": "vshl_n_u16",
      "full name": "uint16x4_t vshl_n_u16(uint16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshlq_n_u16",
      "full name": "uint16x8_t vshlq_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [16]",
      "function_cn": "[向量] 左移 [16]"
    },
    {
      "name": "vshl_n_u32",
      "full name": "uint32x2_t vshl_n_u32(uint32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshlq_n_u32",
      "full name": "uint32x4_t vshlq_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [32]",
      "function_cn": "[向量] 左移 [32]"
    },
    {
      "name": "vshl_n_u64",
      "full name": "uint64x1_t vshl_n_u64(uint64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshlq_n_u64",
      "full name": "uint64x2_t vshlq_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[vector] shl [64]",
      "function_cn": "[向量] 左移 [64]"
    },
    {
      "name": "vshld_n_s64",
      "full name": "int64_t vshld_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[scalar] shl [64]",
      "function_cn": "[标量] 左移 [64]"
    },
    {
      "name": "vshld_n_u64",
      "full name": "uint64_t vshld_n_u64(uint64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shl",
      "function_en": "[scalar] shl [64]",
      "function_cn": "[标量] 左移 [64]"
    },
    {
      "name": "vrshr_n_s8",
      "full name": "int8x8_t vrshr_n_s8(int8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [8]",
      "function_cn": "[向量] 舍入右移 [8]"
    },
    {
      "name": "vrshrq_n_s8",
      "full name": "int8x16_t vrshrq_n_s8(int8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [8]",
      "function_cn": "[向量] 舍入右移 [8]"
    },
    {
      "name": "vrshr_n_s16",
      "full name": "int16x4_t vrshr_n_s16(int16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [16]",
      "function_cn": "[向量] 舍入右移 [16]"
    },
    {
      "name": "vrshrq_n_s16",
      "full name": "int16x8_t vrshrq_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [16]",
      "function_cn": "[向量] 舍入右移 [16]"
    },
    {
      "name": "vrshr_n_s32",
      "full name": "int32x2_t vrshr_n_s32(int32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [32]",
      "function_cn": "[向量] 舍入右移 [32]"
    },
    {
      "name": "vrshrq_n_s32",
      "full name": "int32x4_t vrshrq_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [32]",
      "function_cn": "[向量] 舍入右移 [32]"
    },
    {
      "name": "vrshr_n_s64",
      "full name": "int64x1_t vrshr_n_s64(int64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [64]",
      "function_cn": "[向量] 舍入右移 [64]"
    },
    {
      "name": "vrshrq_n_s64",
      "full name": "int64x2_t vrshrq_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[vector] srshr [64]",
      "function_cn": "[向量] 舍入右移 [64]"
    },
    {
      "name": "vrshr_n_u8",
      "full name": "uint8x8_t vrshr_n_u8(uint8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [8]",
      "function_cn": "[向量] 舍入右移 [8]"
    },
    {
      "name": "vrshrq_n_u8",
      "full name": "uint8x16_t vrshrq_n_u8(uint8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [8]",
      "function_cn": "[向量] 舍入右移 [8]"
    },
    {
      "name": "vrshr_n_u16",
      "full name": "uint16x4_t vrshr_n_u16(uint16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [16]",
      "function_cn": "[向量] 舍入右移 [16]"
    },
    {
      "name": "vrshrq_n_u16",
      "full name": "uint16x8_t vrshrq_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [16]",
      "function_cn": "[向量] 舍入右移 [16]"
    },
    {
      "name": "vrshr_n_u32",
      "full name": "uint32x2_t vrshr_n_u32(uint32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [32]",
      "function_cn": "[向量] 舍入右移 [32]"
    },
    {
      "name": "vrshrq_n_u32",
      "full name": "uint32x4_t vrshrq_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [32]",
      "function_cn": "[向量] 舍入右移 [32]"
    },
    {
      "name": "vrshr_n_u64",
      "full name": "uint64x1_t vrshr_n_u64(uint64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [64]",
      "function_cn": "[向量] 舍入右移 [64]"
    },
    {
      "name": "vrshrq_n_u64",
      "full name": "uint64x2_t vrshrq_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[vector] urshr [64]",
      "function_cn": "[向量] 舍入右移 [64]"
    },
    {
      "name": "vrshrd_n_s64",
      "full name": "int64_t vrshrd_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srshr",
      "function_en": "[scalar] srshr [64]",
      "function_cn": "[标量] 舍入右移 [64]"
    },
    {
      "name": "vrshrd_n_u64",
      "full name": "uint64_t vrshrd_n_u64(uint64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urshr",
      "function_en": "[scalar] urshr [64]",
      "function_cn": "[标量] 舍入右移 [64]"
    },
    {
      "name": "vsra_n_s8",
      "full name": "int8x8_t vsra_n_s8(int8x8_t a, int8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [8]",
      "function_cn": "[向量] 右移相加 [8]"
    },
    {
      "name": "vsraq_n_s8",
      "full name": "int8x16_t vsraq_n_s8(int8x16_t a, int8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [8]",
      "function_cn": "[向量] 右移相加 [8]"
    },
    {
      "name": "vsra_n_s16",
      "full name": "int16x4_t vsra_n_s16(int16x4_t a, int16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [16]",
      "function_cn": "[向量] 右移相加 [16]"
    },
    {
      "name": "vsraq_n_s16",
      "full name": "int16x8_t vsraq_n_s16(int16x8_t a, int16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [16]",
      "function_cn": "[向量] 右移相加 [16]"
    },
    {
      "name": "vsra_n_s32",
      "full name": "int32x2_t vsra_n_s32(int32x2_t a, int32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [32]",
      "function_cn": "[向量] 右移相加 [32]"
    },
    {
      "name": "vsraq_n_s32",
      "full name": "int32x4_t vsraq_n_s32(int32x4_t a, int32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [32]",
      "function_cn": "[向量] 右移相加 [32]"
    },
    {
      "name": "vsra_n_s64",
      "full name": "int64x1_t vsra_n_s64(int64x1_t a, int64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [64]",
      "function_cn": "[向量] 右移相加 [64]"
    },
    {
      "name": "vsraq_n_s64",
      "full name": "int64x2_t vsraq_n_s64(int64x2_t a, int64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[vector] ssra [64]",
      "function_cn": "[向量] 右移相加 [64]"
    },
    {
      "name": "vsra_n_u8",
      "full name": "uint8x8_t vsra_n_u8(uint8x8_t a, uint8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [8]",
      "function_cn": "[向量] 右移相加 [8]"
    },
    {
      "name": "vsraq_n_u8",
      "full name": "uint8x16_t vsraq_n_u8(uint8x16_t a, uint8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [8]",
      "function_cn": "[向量] 右移相加 [8]"
    },
    {
      "name": "vsra_n_u16",
      "full name": "uint16x4_t vsra_n_u16(uint16x4_t a, uint16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [16]",
      "function_cn": "[向量] 右移相加 [16]"
    },
    {
      "name": "vsraq_n_u16",
      "full name": "uint16x8_t vsraq_n_u16(uint16x8_t a, uint16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [16]",
      "function_cn": "[向量] 右移相加 [16]"
    },
    {
      "name": "vsra_n_u32",
      "full name": "uint32x2_t vsra_n_u32(uint32x2_t a, uint32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [32]",
      "function_cn": "[向量] 右移相加 [32]"
    },
    {
      "name": "vsraq_n_u32",
      "full name": "uint32x4_t vsraq_n_u32(uint32x4_t a, uint32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [32]",
      "function_cn": "[向量] 右移相加 [32]"
    },
    {
      "name": "vsra_n_u64",
      "full name": "uint64x1_t vsra_n_u64(uint64x1_t a, uint64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [64]",
      "function_cn": "[向量] 右移相加 [64]"
    },
    {
      "name": "vsraq_n_u64",
      "full name": "uint64x2_t vsraq_n_u64(uint64x2_t a, uint64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[vector] usra [64]",
      "function_cn": "[向量] 右移相加 [64]"
    },
    {
      "name": "vsrad_n_s64",
      "full name": "int64_t vsrad_n_s64(int64_t a, int64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ssra",
      "function_en": "[scalar] ssra [64]",
      "function_cn": "[标量] 右移相加 [64]"
    },
    {
      "name": "vsrad_n_u64",
      "full name": "uint64_t vsrad_n_u64(uint64_t a, uint64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "usra",
      "function_en": "[scalar] usra [64]",
      "function_cn": "[标量] 右移相加 [64]"
    },
    {
      "name": "vrsra_n_s8",
      "full name": "int8x8_t vrsra_n_s8(int8x8_t a, int8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [8]",
      "function_cn": "[向量] 舍入右移相加 [8]"
    },
    {
      "name": "vrsraq_n_s8",
      "full name": "int8x16_t vrsraq_n_s8(int8x16_t a, int8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [8]",
      "function_cn": "[向量] 舍入右移相加 [8]"
    },
    {
      "name": "vrsra_n_s16",
      "full name": "int16x4_t vrsra_n_s16(int16x4_t a, int16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [8]",
      "function_cn": "[向量] 舍入右移相加 [8]"
    },
    {
      "name": "vrsraq_n_s16",
      "full name": "int16x8_t vrsraq_n_s16(int16x8_t a, int16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [16]",
      "function_cn": "[向量] 舍入右移相加 [16]"
    },
    {
      "name": "vrsra_n_s32",
      "full name": "int32x2_t vrsra_n_s32(int32x2_t a, int32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [32]",
      "function_cn": "[向量] 舍入右移相加 [32]"
    },
    {
      "name": "vrsraq_n_s32",
      "full name": "int32x4_t vrsraq_n_s32(int32x4_t a, int32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [32]",
      "function_cn": "[向量] 舍入右移相加 [32]"
    },
    {
      "name": "vrsra_n_s64",
      "full name": "int64x1_t vrsra_n_s64(int64x1_t a, int64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [64]",
      "function_cn": "[向量] 舍入右移相加 [64]"
    },
    {
      "name": "vrsraq_n_s64",
      "full name": "int64x2_t vrsraq_n_s64(int64x2_t a, int64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[vector] srsra [64]",
      "function_cn": "[向量] 舍入右移相加 [64]"
    },
    {
      "name": "vrsra_n_u8",
      "full name": "uint8x8_t vrsra_n_u8(uint8x8_t a, uint8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [8]",
      "function_cn": "[向量] 舍入右移相加 [8]"
    },
    {
      "name": "vrsraq_n_u8",
      "full name": "uint8x16_t vrsraq_n_u8(uint8x16_t a, uint8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [8]",
      "function_cn": "[向量] 舍入右移相加 [8]"
    },
    {
      "name": "vrsra_n_u16",
      "full name": "uint16x4_t vrsra_n_u16(uint16x4_t a, uint16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [16]",
      "function_cn": "[向量] 舍入右移相加 [16]"
    },
    {
      "name": "vrsraq_n_u16",
      "full name": "uint16x8_t vrsraq_n_u16(uint16x8_t a, uint16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [16]",
      "function_cn": "[向量] 舍入右移相加 [16]"
    },
    {
      "name": "vrsra_n_u32",
      "full name": "uint32x2_t vrsra_n_u32(uint32x2_t a, uint32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [32]",
      "function_cn": "[向量] 舍入右移相加 [32]"
    },
    {
      "name": "vrsraq_n_u32",
      "full name": "uint32x4_t vrsraq_n_u32(uint32x4_t a, uint32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [32]",
      "function_cn": "[向量] 舍入右移相加 [32]"
    },
    {
      "name": "vrsra_n_u64",
      "full name": "uint64x1_t vrsra_n_u64(uint64x1_t a, uint64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [64]",
      "function_cn": "[向量] 舍入右移相加 [64]"
    },
    {
      "name": "vrsraq_n_u64",
      "full name": "uint64x2_t vrsraq_n_u64(uint64x2_t a, uint64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[vector] ursra [64]",
      "function_cn": "[向量] 舍入右移相加 [64]"
    },
    {
      "name": "vrsrad_n_s64",
      "full name": "int64_t vrsrad_n_s64(int64_t a, int64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "srsra",
      "function_en": "[scalar] srsra [64]",
      "function_cn": "[标量] 舍入右移相加 [64]"
    },
    {
      "name": "vrsrad_n_u64",
      "full name": "uint64_t vrsrad_n_u64(uint64_t a, uint64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursra",
      "function_en": "[scalar] ursra [64]",
      "function_cn": "[标量] 舍入右移相加 [64]"
    },
    {
      "name": "vqshl_n_s8",
      "full name": "int8x8_t vqshl_n_s8(int8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshlq_n_s8",
      "full name": "int8x16_t vqshlq_n_s8(int8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshl_n_s16",
      "full name": "int16x4_t vqshl_n_s16(int16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshlq_n_s16",
      "full name": "int16x8_t vqshlq_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshl_n_s32",
      "full name": "int32x2_t vqshl_n_s32(int32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshlq_n_s32",
      "full name": "int32x4_t vqshlq_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshl_n_s64",
      "full name": "int64x1_t vqshl_n_s64(int64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshlq_n_s64",
      "full name": "int64x2_t vqshlq_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[vector] sqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshl_n_u8",
      "full name": "uint8x8_t vqshl_n_u8(uint8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshlq_n_u8",
      "full name": "uint8x16_t vqshlq_n_u8(uint8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [8]",
      "function_cn": "[向量] 饱和左移 [8]"
    },
    {
      "name": "vqshl_n_u16",
      "full name": "uint16x4_t vqshl_n_u16(uint16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshlq_n_u16",
      "full name": "uint16x8_t vqshlq_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [16]",
      "function_cn": "[向量] 饱和左移 [16]"
    },
    {
      "name": "vqshl_n_u32",
      "full name": "uint32x2_t vqshl_n_u32(uint32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshlq_n_u32",
      "full name": "uint32x4_t vqshlq_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [32]",
      "function_cn": "[向量] 饱和左移 [32]"
    },
    {
      "name": "vqshl_n_u64",
      "full name": "uint64x1_t vqshl_n_u64(uint64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshlq_n_u64",
      "full name": "uint64x2_t vqshlq_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[vector] uqshl [64]",
      "function_cn": "[向量] 饱和左移 [64]"
    },
    {
      "name": "vqshlb_n_s8",
      "full name": "int8_t vqshlb_n_s8(int8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [8]",
      "function_cn": "[标量] 饱和左移 [8]"
    },
    {
      "name": "vqshlh_n_s16",
      "full name": "int16_t vqshlh_n_s16(int16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [16]",
      "function_cn": "[标量] 饱和左移 [16]"
    },
    {
      "name": "vqshls_n_s32",
      "full name": "int32_t vqshls_n_s32(int32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [32]",
      "function_cn": "[标量] 饱和左移 [32]"
    },
    {
      "name": "vqshld_n_s64",
      "full name": "int64_t vqshld_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshl",
      "function_en": "[scalar] sqshl [64]",
      "function_cn": "[标量] 饱和左移 [64]"
    },
    {
      "name": "vqshlb_n_u8",
      "full name": "uint8_t vqshlb_n_u8(uint8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [8]",
      "function_cn": "[标量] 饱和左移 [8]"
    },
    {
      "name": "vqshlh_n_u16",
      "full name": "uint16_t vqshlh_n_u16(uint16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [16]",
      "function_cn": "[标量] 饱和左移 [16]"
    },
    {
      "name": "vqshls_n_u32",
      "full name": "uint32_t vqshls_n_u32(uint32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [32]",
      "function_cn": "[标量] 饱和左移 [32]"
    },
    {
      "name": "vqshld_n_u64",
      "full name": "uint64_t vqshld_n_u64(uint64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshl",
      "function_en": "[scalar] uqshl [32]",
      "function_cn": "[标量] 饱和左移 [32]"
    },
    {
      "name": "vqshlu_n_s8",
      "full name": "uint8x8_t vqshlu_n_s8(int8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [8]",
      "function_cn": "[向量] 有符号饱和左移无符号 [8]"
    },
    {
      "name": "vqshluq_n_s8",
      "full name": "uint8x16_t vqshluq_n_s8(int8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [8]",
      "function_cn": "[向量] 有符号饱和左移无符号 [8]"
    },
    {
      "name": "vqshlu_n_s16",
      "full name": "uint16x4_t vqshlu_n_s16(int16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [16]",
      "function_cn": "[向量] 有符号饱和左移无符号 [16]"
    },
    {
      "name": "vqshluq_n_s16",
      "full name": "uint16x8_t vqshluq_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [16]",
      "function_cn": "[向量] 有符号饱和左移无符号 [16]"
    },
    {
      "name": "vqshlu_n_s32",
      "full name": "uint32x2_t vqshlu_n_s32(int32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [32]",
      "function_cn": "[向量] 有符号饱和左移无符号 [32]"
    },
    {
      "name": "vqshluq_n_s32",
      "full name": "uint32x4_t vqshluq_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [32]",
      "function_cn": "[向量] 有符号饱和左移无符号 [32]"
    },
    {
      "name": "vqshlu_n_s64",
      "full name": "uint64x1_t vqshlu_n_s64(int64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [64]",
      "function_cn": "[向量] 有符号饱和左移无符号 [64]"
    },
    {
      "name": "vqshluq_n_s64",
      "full name": "uint64x2_t vqshluq_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[vector] sqshlu [64]",
      "function_cn": "[向量] 有符号饱和左移无符号 [64]"
    },
    {
      "name": "vqshlub_n_s8",
      "full name": "uint8_t vqshlub_n_s8(int8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[scalar] sqshlu [8]",
      "function_cn": "[标量] 有符号饱和左移无符号 [8]"
    },
    {
      "name": "vqshluh_n_s16",
      "full name": "uint16_t vqshluh_n_s16(int16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[scalar] sqshlu [16]",
      "function_cn": "[标量] 有符号饱和左移无符号 [16]"
    },
    {
      "name": "vqshlus_n_s32",
      "full name": "uint32_t vqshlus_n_s32(int32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[scalar] sqshlu [32]",
      "function_cn": "[标量] 有符号饱和左移无符号 [23]"
    },
    {
      "name": "vqshlud_n_s64",
      "full name": "uint64_t vqshlud_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshlu",
      "function_en": "[scalar] sqshlu [64]",
      "function_cn": "[标量] 有符号饱和左移无符号 [64]"
    },
    {
      "name": "vshrn_n_s16",
      "full name": "int8x8_t vshrn_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn",
      "function_en": "[vector] shrn [16]",
      "function_cn": "[向量] 窄型右移 [16]"
    },
    {
      "name": "vshrn_n_s32",
      "full name": "int16x4_t vshrn_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn",
      "function_en": "[vector] shrn [32]",
      "function_cn": "[向量] 窄型右移 [32]"
    },
    {
      "name": "vshrn_n_s64",
      "full name": "int32x2_t vshrn_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn",
      "function_en": "[vector] shrn [64]",
      "function_cn": "[向量] 窄型右移 [64]"
    },
    {
      "name": "vshrn_n_u16",
      "full name": "uint8x8_t vshrn_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn",
      "function_en": "[vector] shrn [16]",
      "function_cn": "[向量] 窄型右移 [16]"
    },
    {
      "name": "vshrn_n_u32",
      "full name": "uint16x4_t vshrn_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn",
      "function_en": "[vector] shrn [32]",
      "function_cn": "[向量] 窄型右移 [32]"
    },
    {
      "name": "vshrn_n_u64",
      "full name": "uint32x2_t vshrn_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn",
      "function_en": "[vector] shrn [32]",
      "function_cn": "[向量] 窄型右移 [32]"
    },
    {
      "name": "vshrn_high_n_s16",
      "full name": "int8x16_t vshrn_high_n_s16(int8x8_t r, int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn2",
      "function_en": "[vector] shrn2 [16]",
      "function_cn": "[向量] 窄型右移(立即数) [16]"
    },
    {
      "name": "vshrn_high_n_s32",
      "full name": "int16x8_t vshrn_high_n_s32(int16x4_t r, int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn2",
      "function_en": "[vector] shrn2 [32]",
      "function_cn": "[向量] 窄型右移(立即数) [32]"
    },
    {
      "name": "vshrn_high_n_s64",
      "full name": "int32x4_t vshrn_high_n_s64(int32x2_t r, int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn2",
      "function_en": "[vector] shrn2 [64]",
      "function_cn": "[向量] 窄型右移(立即数) [64]"
    },
    {
      "name": "vshrn_high_n_u16",
      "full name": "uint8x16_t vshrn_high_n_u16(uint8x8_t r, uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn2",
      "function_en": "[vector] shrn2 [16]",
      "function_cn": "[向量] 窄型右移(立即数) [16]"
    },
    {
      "name": "vshrn_high_n_u32",
      "full name": "uint16x8_t vshrn_high_n_u32(uint16x4_t r, uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn2",
      "function_en": "[vector] shrn2 [32]",
      "function_cn": "[向量] 窄型右移(立即数) [32]"
    },
    {
      "name": "vshrn_high_n_u64",
      "full name": "uint32x4_t vshrn_high_n_u64(uint32x2_t r, uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shrn2",
      "function_en": "[vector] shrn2 [64]",
      "function_cn": "[向量] 窄型右移(立即数) [64]"
    },
    {
      "name": "vqshrun_n_s16",
      "full name": "uint8x8_t vqshrun_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun",
      "function_en": "[vector] sqshrun [16]",
      "function_cn": "[向量] 窄型有符号饱和右移为无符号 [16]"
    },
    {
      "name": "vqshrun_n_s32",
      "full name": "uint16x4_t vqshrun_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun",
      "function_en": "[vector] sqshrun [32]",
      "function_cn": "[向量] 窄型有符号饱和右移为无符号 [32]"
    },
    {
      "name": "vqshrun_n_s64",
      "full name": "uint32x2_t vqshrun_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun",
      "function_en": "[vector] sqshrun [64]",
      "function_cn": "[向量] 窄型有符号饱和右移为无符号 [64]"
    },
    {
      "name": "vqshrunh_n_s16",
      "full name": "uint8_t vqshrunh_n_s16(int16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun",
      "function_en": "[scalar] sqshrun [16]",
      "function_cn": "[标量] 窄型有符号饱和右移为无符号 [16]"
    },
    {
      "name": "vqshruns_n_s32",
      "full name": "uint16_t vqshruns_n_s32(int32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun",
      "function_en": "[scalar] sqshrun [32]",
      "function_cn": "[标量] 窄型有符号饱和右移为无符号 [32]"
    },
    {
      "name": "vqshrund_n_s64",
      "full name": "uint32_t vqshrund_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun",
      "function_en": "[scalar] sqshrun [64]",
      "function_cn": "[标量] 窄型有符号饱和右移为无符号 [64]"
    },
    {
      "name": "vqshrun_high_n_s16",
      "full name": "uint8x16_t vqshrun_high_n_s16(uint8x8_t r, int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun2",
      "function_en": "[vector] sqshrun2 [16]",
      "function_cn": "[向量] 窄型有符号饱和右移为无符号(立即数) [16]"
    },
    {
      "name": "vqshrun_high_n_s32",
      "full name": "uint16x8_t vqshrun_high_n_s32(uint16x4_t r, int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun2",
      "function_en": "[vector] sqshrun2 [32]",
      "function_cn": "[向量] 窄型有符号饱和右移为无符号(立即数) [32]"
    },
    {
      "name": "vqshrun_high_n_s64",
      "full name": "uint32x4_t vqshrun_high_n_s64(uint32x2_t r, int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrun2",
      "function_en": "[vector] sqshrun2 [64]",
      "function_cn": "[向量] 窄型有符号饱和右移为无符号(立即数) [64]"
    },
    {
      "name": "vqrshrun_n_s16",
      "full name": "uint8x8_t vqrshrun_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun",
      "function_en": "[vector] sqrshrun [16]",
      "function_cn": "[向量] 窄型有符号饱和舍入右移为无符号 [16]"
    },
    {
      "name": "vqrshrun_n_s32",
      "full name": "uint16x4_t vqrshrun_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun",
      "function_en": "[vector] sqrshrun [32]",
      "function_cn": "[向量] 窄型有符号饱和舍入右移为无符号 [32]"
    },
    {
      "name": "vqrshrun_n_s64",
      "full name": "uint32x2_t vqrshrun_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun",
      "function_en": "[vector] sqrshrun [64]",
      "function_cn": "[向量] 窄型有符号饱和舍入右移为无符号 [64]"
    },
    {
      "name": "vqrshrunh_n_s16",
      "full name": "uint8_t vqrshrunh_n_s16(int16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun",
      "function_en": "[scalar] sqrshrun [16]",
      "function_cn": "[标量] 窄型有符号饱和舍入右移为无符号 [16]"
    },
    {
      "name": "vqrshruns_n_s32",
      "full name": "uint16_t vqrshruns_n_s32(int32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun",
      "function_en": "[scalar] sqrshrun [32]",
      "function_cn": "[标量] 窄型有符号饱和舍入右移为无符号 [32]"
    },
    {
      "name": "vqrshrund_n_s64",
      "full name": "uint32_t vqrshrund_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun",
      "function_en": "[scalar] sqrshrun [64]",
      "function_cn": "[标量] 窄型有符号饱和舍入右移为无符号 [64]"
    },
    {
      "name": "vqrshrun_high_n_s16",
      "full name": "uint8x16_t vqrshrun_high_n_s16(uint8x8_t r, int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun2",
      "function_en": "[vector] sqrshrun2 [16]",
      "function_cn": "[向量] 窄型有符号饱和舍入右移为无符号(立即数) [16]"
    },
    {
      "name": "vqrshrun_high_n_s32",
      "full name": "uint16x8_t vqrshrun_high_n_s32(uint16x4_t r, int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun2",
      "function_en": "[vector] sqrshrun2 [32]",
      "function_cn": "[向量] 窄型有符号饱和舍入右移为无符号(立即数) [32]"
    },
    {
      "name": "vqrshrun_high_n_s64",
      "full name": "uint32x4_t vqrshrun_high_n_s64(uint32x2_t r, int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrun2",
      "function_en": "[vector] sqrshrun2 [64]",
      "function_cn": "[向量] 窄型有符号饱和舍入右移为无符号(立即数) [64]"
    },
    {
      "name": "vqshrn_n_s16",
      "full name": "int8x8_t vqshrn_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn",
      "function_en": "[vector] sqshrn [16]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [16]"
    },
    {
      "name": "vqshrn_n_s32",
      "full name": "int16x4_t vqshrn_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn",
      "function_en": "[vector] sqshrn [32]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [32]"
    },
    {
      "name": "vqshrn_n_s64",
      "full name": "int32x2_t vqshrn_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn",
      "function_en": "[vector] sqshrn [64]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [64]"
    },
    {
      "name": "vqshrn_n_u16",
      "full name": "uint8x8_t vqshrn_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn",
      "function_en": "[vector] uqshrn [16]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [16]"
    },
    {
      "name": "vqshrn_n_u32",
      "full name": "uint16x4_t vqshrn_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn",
      "function_en": "[vector] uqshrn [32]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [32]"
    },
    {
      "name": "vqshrn_n_u64",
      "full name": "uint32x2_t vqshrn_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn",
      "function_en": "[vector] uqshrn [64]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [64]"
    },
    {
      "name": "vqshrnh_n_s16",
      "full name": "int8_t vqshrnh_n_s16(int16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn",
      "function_en": "[scalar] sqshrn [16]",
      "function_cn": "[标量] 窄型饱和右移(立即数) [16]"
    },
    {
      "name": "vqshrns_n_s32",
      "full name": "int16_t vqshrns_n_s32(int32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn",
      "function_en": "[scalar] sqshrn [32]",
      "function_cn": "[标量] 窄型饱和右移(立即数) [32]"
    },
    {
      "name": "vqshrnd_n_s64",
      "full name": "int32_t vqshrnd_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn",
      "function_en": "[scalar] sqshrn [64]",
      "function_cn": "[标量] 窄型饱和右移(立即数) [64]"
    },
    {
      "name": "vqshrnh_n_u16",
      "full name": "uint8_t vqshrnh_n_u16(uint16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn",
      "function_en": "[scalar] uqshrn [16]",
      "function_cn": "[标量] 窄型饱和右移(立即数) [16]"
    },
    {
      "name": "vqshrns_n_u32",
      "full name": "uint16_t vqshrns_n_u32(uint32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn",
      "function_en": "[scalar] uqshrn [32]",
      "function_cn": "[标量] 窄型饱和右移(立即数) [32]"
    },
    {
      "name": "vqshrnd_n_u64",
      "full name": "uint32_t vqshrnd_n_u64(uint64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn",
      "function_en": "[scalar] uqshrn [64]",
      "function_cn": "[标量] 窄型饱和右移(立即数) [64]"
    },
    {
      "name": "vqshrn_high_n_s16",
      "full name": "int8x16_t vqshrn_high_n_s16(int8x8_t r, int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn2",
      "function_en": "[vector] sqshrn2 [16]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [16]"
    },
    {
      "name": "vqshrn_high_n_s32",
      "full name": "int16x8_t vqshrn_high_n_s32(int16x4_t r, int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn2",
      "function_en": "[vector] sqshrn2 [32]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [32]"
    },
    {
      "name": "vqshrn_high_n_s64",
      "full name": "int32x4_t vqshrn_high_n_s64(int32x2_t r, int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqshrn2",
      "function_en": "[vector] sqshrn2 [64]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [64]"
    },
    {
      "name": "vqshrn_high_n_u16",
      "full name": "uint8x16_t vqshrn_high_n_u16(uint8x8_t r, uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn2",
      "function_en": "[vector] uqshrn2 [16]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [16]"
    },
    {
      "name": "vqshrn_high_n_u32",
      "full name": "uint16x8_t vqshrn_high_n_u32(uint16x4_t r, uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn2",
      "function_en": "[vector] uqshrn2 [32]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [32]"
    },
    {
      "name": "vqshrn_high_n_u64",
      "full name": "uint32x4_t vqshrn_high_n_u64(uint32x2_t r, uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqshrn2",
      "function_en": "[vector] uqshrn2 [64]",
      "function_cn": "[向量] 窄型饱和右移(立即数) [64]"
    },
    {
      "name": "vrshrn_n_s16",
      "full name": "int8x8_t vrshrn_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn",
      "function_en": "[vector] rshrn [16]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [16]"
    },
    {
      "name": "vrshrn_n_s32",
      "full name": "int16x4_t vrshrn_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn",
      "function_en": "[vector] rshrn [32]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [32]"
    },
    {
      "name": "vrshrn_n_s64",
      "full name": "int32x2_t vrshrn_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn",
      "function_en": "[vector] rshrn [64]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [64]"
    },
    {
      "name": "vrshrn_n_u16",
      "full name": "uint8x8_t vrshrn_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn",
      "function_en": "[vector] rshrn [16]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [16]"
    },
    {
      "name": "vrshrn_n_u32",
      "full name": "uint16x4_t vrshrn_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn",
      "function_en": "[vector] rshrn [32]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [32]"
    },
    {
      "name": "vrshrn_n_u64",
      "full name": "uint32x2_t vrshrn_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn",
      "function_en": "[vector] rshrn [64]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [64]"
    },
    {
      "name": "vrshrn_high_n_s16",
      "full name": "int8x16_t vrshrn_high_n_s16(int8x8_t r, int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn2",
      "function_en": "[vector] rshrn2 [16]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [16]"
    },
    {
      "name": "vrshrn_high_n_s32",
      "full name": "int16x8_t vrshrn_high_n_s32(int16x4_t r, int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn2",
      "function_en": "[vector] rshrn2 [32]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [32]"
    },
    {
      "name": "vrshrn_high_n_s64",
      "full name": "int32x4_t vrshrn_high_n_s64(int32x2_t r, int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn2",
      "function_en": "[vector] rshrn2 [64]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [64]"
    },
    {
      "name": "vrshrn_high_n_u16",
      "full name": "uint8x16_t vrshrn_high_n_u16(uint8x8_t r, uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn2",
      "function_en": "[vector] rshrn2 [16]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [16]"
    },
    {
      "name": "vrshrn_high_n_u32",
      "full name": "uint16x8_t vrshrn_high_n_u32(uint16x4_t r, uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn2",
      "function_en": "[vector] rshrn2 [32]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [32]"
    },
    {
      "name": "vrshrn_high_n_u64",
      "full name": "uint32x4_t vrshrn_high_n_u64(uint32x2_t r, uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rshrn2",
      "function_en": "[vector] rshrn2 [64]",
      "function_cn": "[向量] 窄型舍入右移(立即数) [64]"
    },
    {
      "name": "vqrshrn_n_s16",
      "full name": "int8x8_t vqrshrn_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn",
      "function_en": "[vector] sqrshrn [16]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [16]"
    },
    {
      "name": "vqrshrn_n_s32",
      "full name": "int16x4_t vqrshrn_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn",
      "function_en": "[vector] sqrshrn [32]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [32]"
    },
    {
      "name": "vqrshrn_n_s64",
      "full name": "int32x2_t vqrshrn_n_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn",
      "function_en": "[vector] sqrshrn [64]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [64]"
    },
    {
      "name": "vqrshrn_n_u16",
      "full name": "uint8x8_t vqrshrn_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn",
      "function_en": "[vector] uqrshrn [16]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [16]"
    },
    {
      "name": "vqrshrn_n_u32",
      "full name": "uint16x4_t vqrshrn_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn",
      "function_en": "[vector] uqrshrn [32]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [32]"
    },
    {
      "name": "vqrshrn_n_u64",
      "full name": "uint32x2_t vqrshrn_n_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn",
      "function_en": "[vector] uqrshrn [64]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [64]"
    },
    {
      "name": "vqrshrnh_n_s16",
      "full name": "int8_t vqrshrnh_n_s16(int16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn",
      "function_en": "[scalar] sqrshrn [16]",
      "function_cn": "[标量] 窄型饱和舍入右移(立即数) [16]"
    },
    {
      "name": "vqrshrns_n_s32",
      "full name": "int16_t vqrshrns_n_s32(int32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn",
      "function_en": "[scalar] sqrshrn [32]",
      "function_cn": "[标量] 窄型饱和舍入右移(立即数) [32]"
    },
    {
      "name": "vqrshrnd_n_s64",
      "full name": "int32_t vqrshrnd_n_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn",
      "function_en": "[scalar] sqrshrn [64]",
      "function_cn": "[标量] 窄型饱和舍入右移(立即数) [64]"
    },
    {
      "name": "vqrshrnh_n_u16",
      "full name": "uint8_t vqrshrnh_n_u16(uint16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn",
      "function_en": "[scalar] uqrshrn [16]",
      "function_cn": "[标量] 窄型饱和舍入右移(立即数) [16]"
    },
    {
      "name": "vqrshrns_n_u32",
      "full name": "uint16_t vqrshrns_n_u32(uint32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn",
      "function_en": "[scalar] uqrshrn [32]",
      "function_cn": "[标量] 窄型饱和舍入右移(立即数) [32]"
    },
    {
      "name": "vqrshrnd_n_u64",
      "full name": "uint32_t vqrshrnd_n_u64(uint64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn",
      "function_en": "[scalar] uqrshrn [64]",
      "function_cn": "[标量] 窄型饱和舍入右移(立即数)  [64]"
    },
    {
      "name": "vqrshrn_high_n_s16",
      "full name": "int8x16_t vqrshrn_high_n_s16(int8x8_t r, int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn2",
      "function_en": "[vector] sqrshrn2 [16]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [16]"
    },
    {
      "name": "vqrshrn_high_n_s32",
      "full name": "int16x8_t vqrshrn_high_n_s32(int16x4_t r, int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn2",
      "function_en": "[vector] sqrshrn2 [32]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [32]"
    },
    {
      "name": "vqrshrn_high_n_s64",
      "full name": "int32x4_t vqrshrn_high_n_s64(int32x2_t r, int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrshrn2",
      "function_en": "[vector] sqrshrn2 [64]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [64]"
    },
    {
      "name": "vqrshrn_high_n_u16",
      "full name": "uint8x16_t vqrshrn_high_n_u16(uint8x8_t r, uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn2",
      "function_en": "[vector] uqrshrn2 [16]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [16]"
    },
    {
      "name": "vqrshrn_high_n_u32",
      "full name": "uint16x8_t vqrshrn_high_n_u32(uint16x4_t r, uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn2",
      "function_en": "[vector] uqrshrn2 [32]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [32]"
    },
    {
      "name": "vqrshrn_high_n_u64",
      "full name": "uint32x4_t vqrshrn_high_n_u64(uint32x2_t r, uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqrshrn2",
      "function_en": "[vector] uqrshrn2 [64]",
      "function_cn": "[向量] 窄型饱和舍入右移(立即数) [64]"
    },
    {
      "name": "vshll_n_s8",
      "full name": "int16x8_t vshll_n_s8(int8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll",
      "function_en": "[vector] shll [8]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 8) [8]"
    },
    {
      "name": "vshll_n_s16",
      "full name": "int32x4_t vshll_n_s16(int16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll",
      "function_en": "[vector] shll [16]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 16) [16]"
    },
    {
      "name": "vshll_n_s32",
      "full name": "int64x2_t vshll_n_s32(int32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll",
      "function_en": "[vector] shll [32]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 32) [32]"
    },
    {
      "name": "vshll_n_u8",
      "full name": "uint16x8_t vshll_n_u8(uint8x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll",
      "function_en": "[vector] shll [8]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 8) [8]"
    },
    {
      "name": "vshll_n_u16",
      "full name": "uint32x4_t vshll_n_u16(uint16x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll",
      "function_en": "[vector] shll [16]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 16) [16]"
    },
    {
      "name": "vshll_n_u32",
      "full name": "uint64x2_t vshll_n_u32(uint32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll",
      "function_en": "[vector] shll [32]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 32) [32]"
    },
    {
      "name": "vshll_high_n_s8",
      "full name": "int16x8_t vshll_high_n_s8(int8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll2",
      "function_en": "[vector] shll2 [8]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 8) [8]"
    },
    {
      "name": "vshll_high_n_s16",
      "full name": "int32x4_t vshll_high_n_s16(int16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll2",
      "function_en": "[vector] shll2 [16]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 16) [16]"
    },
    {
      "name": "vshll_high_n_s32",
      "full name": "int64x2_t vshll_high_n_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll2",
      "function_en": "[vector] shll2 [32]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 32) [32]"
    },
    {
      "name": "vshll_high_n_u8",
      "full name": "uint16x8_t vshll_high_n_u8(uint8x16_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll2",
      "function_en": "[vector] shll2 [8]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 8) [8]"
    },
    {
      "name": "vshll_high_n_u16",
      "full name": "uint32x4_t vshll_high_n_u16(uint16x8_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll2",
      "function_en": "[vector] shll2 [16]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 8) [16]"
    },
    {
      "name": "vshll_high_n_u32",
      "full name": "uint64x2_t vshll_high_n_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "shll2",
      "function_en": "[vector] shll2 [32]",
      "function_cn": "[向量] 长型左移(按元素个数, n = 8) [32]"
    },
    {
      "name": "vsri_n_s8",
      "full name": "int8x8_t vsri_n_s8(int8x8_t a, int8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [8]",
      "function_cn": "[向量] 右移并插入 [8]"
    },
    {
      "name": "vsriq_n_s8",
      "full name": "int8x16_t vsriq_n_s8(int8x16_t a, int8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [8]",
      "function_cn": "[向量] 右移并插入 [8]"
    },
    {
      "name": "vsri_n_s16",
      "full name": "int16x4_t vsri_n_s16(int16x4_t a, int16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [16]",
      "function_cn": "[向量] 右移并插入 [16]"
    },
    {
      "name": "vsriq_n_s16",
      "full name": "int16x8_t vsriq_n_s16(int16x8_t a, int16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [16]",
      "function_cn": "[向量] 右移并插入 [16]"
    },
    {
      "name": "vsri_n_s32",
      "full name": "int32x2_t vsri_n_s32(int32x2_t a, int32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [32]",
      "function_cn": "[向量] 右移并插入 [32]"
    },
    {
      "name": "vsriq_n_s32",
      "full name": "int32x4_t vsriq_n_s32(int32x4_t a, int32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [32]",
      "function_cn": "[向量] 右移并插入 [32]"
    },
    {
      "name": "vsri_n_s64",
      "full name": "int64x1_t vsri_n_s64(int64x1_t a, int64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [64]",
      "function_cn": "[向量] 右移并插入 [64]"
    },
    {
      "name": "vsriq_n_s64",
      "full name": "int64x2_t vsriq_n_s64(int64x2_t a, int64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [64]",
      "function_cn": "[向量] 右移并插入 [64]"
    },
    {
      "name": "vsri_n_u8",
      "full name": "uint8x8_t vsri_n_u8(uint8x8_t a, uint8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [8]",
      "function_cn": "[向量] 右移并插入 [8]"
    },
    {
      "name": "vsriq_n_u8",
      "full name": "uint8x16_t vsriq_n_u8(uint8x16_t a, uint8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [8]",
      "function_cn": "[向量] 右移并插入 [8]"
    },
    {
      "name": "vsri_n_u16",
      "full name": "uint16x4_t vsri_n_u16(uint16x4_t a, uint16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [16]",
      "function_cn": "[向量] 右移并插入 [16]"
    },
    {
      "name": "vsriq_n_u16",
      "full name": "uint16x8_t vsriq_n_u16(uint16x8_t a, uint16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [16]",
      "function_cn": "[向量] 右移并插入 [16]"
    },
    {
      "name": "vsri_n_u32",
      "full name": "uint32x2_t vsri_n_u32(uint32x2_t a, uint32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [32]",
      "function_cn": "[向量] 右移并插入 [32]"
    },
    {
      "name": "vsriq_n_u32",
      "full name": "uint32x4_t vsriq_n_u32(uint32x4_t a, uint32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [32]",
      "function_cn": "[向量] 右移并插入 [32]"
    },
    {
      "name": "vsri_n_u64",
      "full name": "uint64x1_t vsri_n_u64(uint64x1_t a, uint64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [64]",
      "function_cn": "[向量] 右移并插入 [64]"
    },
    {
      "name": "vsriq_n_u64",
      "full name": "uint64x2_t vsriq_n_u64(uint64x2_t a, uint64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [64]",
      "function_cn": "[向量] 右移并插入 [64]"
    },
    {
      "name": "vsri_n_p64",
      "full name": "poly64x1_t vsri_n_p64(poly64x1_t a, poly64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [64]",
      "function_cn": "[向量] 右移并插入 [64]"
    },
    {
      "name": "vsriq_n_p64",
      "full name": "poly64x2_t vsriq_n_p64(poly64x2_t a, poly64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [64]",
      "function_cn": "[向量] 右移并插入 [64]"
    },
    {
      "name": "vsri_n_p8",
      "full name": "poly8x8_t vsri_n_p8(poly8x8_t a, poly8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [8]",
      "function_cn": "[向量] 右移并插入 [8]"
    },
    {
      "name": "vsriq_n_p8",
      "full name": "poly8x16_t vsriq_n_p8(poly8x16_t a, poly8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [8]",
      "function_cn": "[向量] 右移并插入 [8]"
    },
    {
      "name": "vsri_n_p16",
      "full name": "poly16x4_t vsri_n_p16(poly16x4_t a, poly16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [16]",
      "function_cn": "[向量] 右移并插入 [16]"
    },
    {
      "name": "vsriq_n_p16",
      "full name": "poly16x8_t vsriq_n_p16(poly16x8_t a, poly16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[vector] sri [16]",
      "function_cn": "[向量] 右移并插入 [16]"
    },
    {
      "name": "vsrid_n_s64",
      "full name": "int64_t vsrid_n_s64(int64_t a, int64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[scalar] sri [64]",
      "function_cn": "[标量] 右移并插入 [64]"
    },
    {
      "name": "vsrid_n_u64",
      "full name": "uint64_t vsrid_n_u64(uint64_t a, uint64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sri",
      "function_en": "[scalar] sri [64]",
      "function_cn": "[标量] 右移并插入 [64]"
    },
    {
      "name": "vsli_n_s8",
      "full name": "int8x8_t vsli_n_s8(int8x8_t a, int8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [8]",
      "function_cn": "[向量] 左移并插入 [8]"
    },
    {
      "name": "vsliq_n_s8",
      "full name": "int8x16_t vsliq_n_s8(int8x16_t a, int8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [8]",
      "function_cn": "[向量] 左移并插入 [8]"
    },
    {
      "name": "vsli_n_s16",
      "full name": "int16x4_t vsli_n_s16(int16x4_t a, int16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [16]",
      "function_cn": "[向量] 左移并插入 [16]"
    },
    {
      "name": "vsliq_n_s16",
      "full name": "int16x8_t vsliq_n_s16(int16x8_t a, int16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [16]",
      "function_cn": "[向量] 左移并插入 [16]"
    },
    {
      "name": "vsli_n_s32",
      "full name": "int32x2_t vsli_n_s32(int32x2_t a, int32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [32]",
      "function_cn": "[向量] 左移并插入 [32]"
    },
    {
      "name": "vsliq_n_s32",
      "full name": "int32x4_t vsliq_n_s32(int32x4_t a, int32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [32]",
      "function_cn": "[向量] 左移并插入 [32]"
    },
    {
      "name": "vsli_n_s64",
      "full name": "int64x1_t vsli_n_s64(int64x1_t a, int64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [64]",
      "function_cn": "[向量] 左移并插入 [64]"
    },
    {
      "name": "vsliq_n_s64",
      "full name": "int64x2_t vsliq_n_s64(int64x2_t a, int64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [64]",
      "function_cn": "[向量] 左移并插入 [64]"
    },
    {
      "name": "vsli_n_u8",
      "full name": "uint8x8_t vsli_n_u8(uint8x8_t a, uint8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [8]",
      "function_cn": "[向量] 左移并插入 [8]"
    },
    {
      "name": "vsliq_n_u8",
      "full name": "uint8x16_t vsliq_n_u8(uint8x16_t a, uint8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [8]",
      "function_cn": "[向量] 左移并插入 [8]"
    },
    {
      "name": "vsli_n_u16",
      "full name": "uint16x4_t vsli_n_u16(uint16x4_t a, uint16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [16]",
      "function_cn": "[向量] 左移并插入 [16]"
    },
    {
      "name": "vsliq_n_u16",
      "full name": "uint16x8_t vsliq_n_u16(uint16x8_t a, uint16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [16]",
      "function_cn": "[向量] 左移并插入 [16]"
    },
    {
      "name": "vsli_n_u32",
      "full name": "uint32x2_t vsli_n_u32(uint32x2_t a, uint32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [32]",
      "function_cn": "[向量] 左移并插入 [32]"
    },
    {
      "name": "vsliq_n_u32",
      "full name": "uint32x4_t vsliq_n_u32(uint32x4_t a, uint32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [32]",
      "function_cn": "[向量] 左移并插入 [32]"
    },
    {
      "name": "vsli_n_u64",
      "full name": "uint64x1_t vsli_n_u64(uint64x1_t a, uint64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [64]",
      "function_cn": "[向量] 左移并插入 [64]"
    },
    {
      "name": "vsliq_n_u64",
      "full name": "uint64x2_t vsliq_n_u64(uint64x2_t a, uint64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [64]",
      "function_cn": "[向量] 左移并插入 [64]"
    },
    {
      "name": "vsli_n_p64",
      "full name": "poly64x1_t vsli_n_p64(poly64x1_t a, poly64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [64]",
      "function_cn": "[向量] 左移并插入 [64]"
    },
    {
      "name": "vsliq_n_p64",
      "full name": "poly64x2_t vsliq_n_p64(poly64x2_t a, poly64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [64]",
      "function_cn": "[向量] 左移并插入 [64]"
    },
    {
      "name": "vsli_n_p8",
      "full name": "poly8x8_t vsli_n_p8(poly8x8_t a, poly8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [8]",
      "function_cn": "[向量] 左移并插入 [8]"
    },
    {
      "name": "vsliq_n_p8",
      "full name": "poly8x16_t vsliq_n_p8(poly8x16_t a, poly8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [8]",
      "function_cn": "[向量] 左移并插入 [8]"
    },
    {
      "name": "vsli_n_p16",
      "full name": "poly16x4_t vsli_n_p16(poly16x4_t a, poly16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [16]",
      "function_cn": "[向量] 左移并插入 [16]"
    },
    {
      "name": "vsliq_n_p16",
      "full name": "poly16x8_t vsliq_n_p16(poly16x8_t a, poly16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[vector] sli [16]",
      "function_cn": "[向量] 左移并插入 [16]"
    },
    {
      "name": "vslid_n_s64",
      "full name": "int64_t vslid_n_s64(int64_t a, int64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[scalar] sli [64]",
      "function_cn": "[标量] 左移并插入 [64]"
    },
    {
      "name": "vslid_n_u64",
      "full name": "uint64_t vslid_n_u64(uint64_t a, uint64_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sli",
      "function_en": "[scalar] sli [64]",
      "function_cn": "[标量] 左移并插入 [64]"
    },
    {
      "name": "vcvt_s32_f32",
      "full name": "int32x2_t vcvt_s32_f32(float32x2_t a)",
      "Intel name": "_mm_cvtt_ps2pi",
      "Intel Asm": "cvttps2pi",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [32]",
      "function_cn": "[向量] 浮点数转为有符号整数,向零舍入 [32]"
    },
    {
      "name": "vcvtq_s32_f32",
      "full name": "int32x4_t vcvtq_s32_f32(float32x4_t a)",
      "Intel name": "_mm_cvttps_epi32",
      "Intel Asm": "cvttps2dq",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [32]",
      "function_cn": "[向量] 浮点数转为有符号整数,向零舍入 [32]"
    },
    {
      "name": "vcvt_u32_f32",
      "full name": "uint32x2_t vcvt_u32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数,向零舍入 [32]"
    },
    {
      "name": "vcvtq_u32_f32",
      "full name": "uint32x4_t vcvtq_u32_f32(float32x4_t a)",
      "Intel name": "_mm_cvttps_epu32",
      "Intel Asm": "vcvttps2udq",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数,向零舍入 [32]"
    },
    {
      "name": "vcvtn_s32_f32",
      "full name": "int32x2_t vcvtn_s32_f32(float32x2_t a)",
      "Intel name": "_mm_cvt_ps2pi",
      "Intel Asm": "cvtps2pi",
      "Arm Asm": "fcvtns",
      "function_en": "[vector] fcvtns [32]",
      "function_cn": "[向量] 浮点数转为有符号整数,四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vcvtnq_s32_f32",
      "full name": "int32x4_t vcvtnq_s32_f32(float32x4_t a)",
      "Intel name": "_mm_cvtps_epi32",
      "Intel Asm": "cvtps2dq",
      "Arm Asm": "fcvtns",
      "function_en": "[vector] fcvtns [32]",
      "function_cn": "[向量] 浮点数转为有符号整数,四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vcvtn_u32_f32",
      "full name": "uint32x2_t vcvtn_u32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtnu",
      "function_en": "[vector] fcvtnu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数,四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vcvtnq_u32_f32",
      "full name": "uint32x4_t vcvtnq_u32_f32(float32x4_t a)",
      "Intel name": "_mm_cvtps_epu32",
      "Intel Asm": "vcvtps2udq",
      "Arm Asm": "fcvtnu",
      "function_en": "[vector] fcvtnu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数,四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vcvtm_s32_f32",
      "full name": "int32x2_t vcvtm_s32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtms",
      "function_en": "[vector] fcvtms [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，向负无穷舍入 [32]"
    },
    {
      "name": "vcvtmq_s32_f32",
      "full name": "int32x4_t vcvtmq_s32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtms",
      "function_en": "[vector] fcvtms [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，向负无穷舍入 [32]"
    },
    {
      "name": "vcvtm_u32_f32",
      "full name": "uint32x2_t vcvtm_u32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtmu",
      "function_en": "[vector] fcvtmu [32",
      "function_cn": "[向量] 浮点数转为无符号整数，向负无穷舍入 [32]"
    },
    {
      "name": "vcvtmq_u32_f32",
      "full name": "uint32x4_t vcvtmq_u32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtmu",
      "function_en": "[vector] fcvtmu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数，向负无穷舍入 [32]"
    },
    {
      "name": "vcvtp_s32_f32",
      "full name": "int32x2_t vcvtp_s32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtps",
      "function_en": "[vector] fcvtps [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，向正无穷舍入 [32]"
    },
    {
      "name": "vcvtpq_s32_f32",
      "full name": "int32x4_t vcvtpq_s32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtps",
      "function_en": "[vector] fcvtps [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，向正无穷舍入 [32]"
    },
    {
      "name": "vcvtp_u32_f32",
      "full name": "uint32x2_t vcvtp_u32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtpu",
      "function_en": "[vector] fcvtpu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数，向正无穷舍入 [32]"
    },
    {
      "name": "vcvtpq_u32_f32",
      "full name": "uint32x4_t vcvtpq_u32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtpu",
      "function_en": "[vector] fcvtpu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数，向正无穷舍入 [32]"
    },
    {
      "name": "vcvta_s32_f32",
      "full name": "int32x2_t vcvta_s32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtas",
      "function_en": "[vector] fcvtas [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vcvtaq_s32_f32",
      "full name": "int32x4_t vcvtaq_s32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtas",
      "function_en": "[vector] fcvtas [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vcvta_u32_f32",
      "full name": "uint32x2_t vcvta_u32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtau",
      "function_en": "[vector] fcvtau [32]",
      "function_cn": "[向量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vcvtaq_u32_f32",
      "full name": "uint32x4_t vcvtaq_u32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtau",
      "function_en": "[vector] fcvtau [32]",
      "function_cn": "[向量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vcvts_s32_f32",
      "full name": "int32_t vcvts_s32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[scalar] fcvtzs [32]",
      "function_cn": "[标量] 浮点数转为有符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvts_u32_f32",
      "full name": "uint32_t vcvts_u32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[scalar] fcvtzu [32]",
      "function_cn": "[标量] 浮点数转为无符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvtns_s32_f32",
      "full name": "int32_t vcvtns_s32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtns",
      "function_en": "[scalar] fcvtns [32]",
      "function_cn": "[标量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vcvtns_u32_f32",
      "full name": "uint32_t vcvtns_u32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtnu",
      "function_en": "[scalar] fcvtnu [32]",
      "function_cn": "[标量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vcvtms_s32_f32",
      "full name": "int32_t vcvtms_s32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtms",
      "function_en": "[scalar] fcvtms [32]",
      "function_cn": "[标量] 浮点数转为有符号整数，向负无穷舍入 [32]"
    },
    {
      "name": "vcvtms_u32_f32",
      "full name": "uint32_t vcvtms_u32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtmu",
      "function_en": "[scalar] fcvtmu [32]",
      "function_cn": "[标量] 浮点数转为无符号整数，向负无穷舍入 [32]"
    },
    {
      "name": "vcvtps_s32_f32",
      "full name": "int32_t vcvtps_s32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtps",
      "function_en": "[scalar] fcvtps [32]",
      "function_cn": "[标量] 浮点数转为有符号整数，向正无穷舍入 [32]"
    },
    {
      "name": "vcvtps_u32_f32",
      "full name": "uint32_t vcvtps_u32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtpu",
      "function_en": "[scalar] fcvtpu [32]",
      "function_cn": "[标量] 浮点数转为无符号整数，向正无穷舍入 [32]"
    },
    {
      "name": "vcvtas_s32_f32",
      "full name": "int32_t vcvtas_s32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtas",
      "function_en": "[scalar] fcvtas [32]",
      "function_cn": "[标量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vcvtas_u32_f32",
      "full name": "uint32_t vcvtas_u32_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtau",
      "function_en": "[scalar] fcvtau [32]",
      "function_cn": "[标量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vcvt_s64_f64",
      "full name": "int64x1_t vcvt_s64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtq_s64_f64",
      "full name": "int64x2_t vcvtq_s64_f64(float64x2_t a)",
      "Intel name": "_mm_cvttpd_epi64",
      "Intel Asm": "vcvttpd2qq",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvt_u64_f64",
      "full name": "uint64x1_t vcvt_u64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtq_u64_f64",
      "full name": "uint64x2_t vcvtq_u64_f64(float64x2_t a)",
      "Intel name": "_mm_cvttpd_epu64",
      "Intel Asm": "vcvttpd2uqq",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtn_s64_f64",
      "full name": "int64x1_t vcvtn_s64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtns",
      "function_en": "[vector] fcvtns [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vcvtnq_s64_f64",
      "full name": "int64x2_t vcvtnq_s64_f64(float64x2_t a)",
      "Intel name": "_mm_cvtpd_epi64",
      "Intel Asm": "vcvtpd2qq",
      "Arm Asm": "fcvtns",
      "function_en": "[vector] fcvtns [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vcvtn_u64_f64",
      "full name": "uint64x1_t vcvtn_u64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtnu",
      "function_en": "[vector] fcvtnu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vcvtnq_u64_f64",
      "full name": "uint64x2_t vcvtnq_u64_f64(float64x2_t a)",
      "Intel name": "_mm_cvtpd_epu64",
      "Intel Asm": "vcvtpd2uqq",
      "Arm Asm": "fcvtnu",
      "function_en": "[vector] fcvtnu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vcvtm_s64_f64",
      "full name": "int64x1_t vcvtm_s64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtms",
      "function_en": "[vector] fcvtms [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向负无穷舍入 [64]"
    },
    {
      "name": "vcvtmq_s64_f64",
      "full name": "int64x2_t vcvtmq_s64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtms",
      "function_en": "[vector] fcvtms [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向负无穷舍入 [64]"
    },
    {
      "name": "vcvtm_u64_f64",
      "full name": "uint64x1_t vcvtm_u64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtmu",
      "function_en": "[vector] fcvtmu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向负无穷舍入 [64]"
    },
    {
      "name": "vcvtmq_u64_f64",
      "full name": "uint64x2_t vcvtmq_u64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtmu",
      "function_en": "[vector] fcvtmu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向负无穷舍入 [64]"
    },
    {
      "name": "vcvtp_s64_f64",
      "full name": "int64x1_t vcvtp_s64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtps",
      "function_en": "[vector] fcvtps [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向正无穷舍入 [64]"
    },
    {
      "name": "vcvtpq_s64_f64",
      "full name": "int64x2_t vcvtpq_s64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtps",
      "function_en": "[vector] fcvtps [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向正无穷舍入 [64]"
    },
    {
      "name": "vcvtp_u64_f64",
      "full name": "uint64x1_t vcvtp_u64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtpu",
      "function_en": "[vector] fcvtpu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向正无穷舍入 [64]"
    },
    {
      "name": "vcvtpq_u64_f64",
      "full name": "uint64x2_t vcvtpq_u64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtpu",
      "function_en": "[vector] fcvtpu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向正无穷舍入 [64]"
    },
    {
      "name": "vcvta_s64_f64",
      "full name": "int64x1_t vcvta_s64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtas",
      "function_en": "[vector] fcvtas [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [64]"
    },
    {
      "name": "vcvtaq_s64_f64",
      "full name": "int64x2_t vcvtaq_s64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtas",
      "function_en": "[vector] fcvtas [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [64]"
    },
    {
      "name": "vcvta_u64_f64",
      "full name": "uint64x1_t vcvta_u64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtau",
      "function_en": "[vector] fcvtau [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [64]"
    },
    {
      "name": "vcvtaq_u64_f64",
      "full name": "uint64x2_t vcvtaq_u64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtau",
      "function_en": "[vector] fcvtau [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [64]"
    },
    {
      "name": "vcvtd_s64_f64",
      "full name": "int64_t vcvtd_s64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[scalar] fcvtzs [64]",
      "function_cn": "[标量] 浮点数转为有符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtd_u64_f64",
      "full name": "uint64_t vcvtd_u64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[scalar] fcvtzu [64]",
      "function_cn": "[标量] 浮点数转为无符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtnd_s64_f64",
      "full name": "int64_t vcvtnd_s64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtns",
      "function_en": "[scalar] fcvtns [64]",
      "function_cn": "[标量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vcvtnd_u64_f64",
      "full name": "uint64_t vcvtnd_u64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtnu",
      "function_en": "[scalar] fcvtnu [64]",
      "function_cn": "[标量] 浮点数转为无符号整数，四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vcvtmd_s64_f64",
      "full name": "int64_t vcvtmd_s64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtms",
      "function_en": "[scalar] fcvtms [64]",
      "function_cn": "[标量] 浮点数转为有符号整数，向负无穷舍入 [64]"
    },
    {
      "name": "vcvtmd_u64_f64",
      "full name": "uint64_t vcvtmd_u64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtmu",
      "function_en": "[scalar] fcvtmu [64]",
      "function_cn": "[标量] 浮点数转为无符号整数，向负无穷舍入 [64]"
    },
    {
      "name": "vcvtpd_s64_f64",
      "full name": "int64_t vcvtpd_s64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtps",
      "function_en": "[scalar] fcvtps [64]",
      "function_cn": "[标量] 浮点数转为有符号整数，向正无穷舍入 [64]"
    },
    {
      "name": "vcvtpd_u64_f64",
      "full name": "uint64_t vcvtpd_u64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtpu",
      "function_en": "[scalar] fcvtpu [64]",
      "function_cn": "[标量] 浮点数转为无符号整数，向正无穷舍入 [64]"
    },
    {
      "name": "vcvtad_s64_f64",
      "full name": "int64_t vcvtad_s64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtas",
      "function_en": "[scalar] fcvtas [64]",
      "function_cn": "[标量] 浮点数转为有符号整数，四舍五入到最近的值，数字落在中间时远离零舍入 [64]"
    },
    {
      "name": "vcvtad_u64_f64",
      "full name": "uint64_t vcvtad_u64_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtau",
      "function_en": "[scalar] fcvtau [64]",
      "function_cn": "[标量] 浮点数转为无符号整数，向正无穷舍入 [64]"
    },
    {
      "name": "vcvt_n_s32_f32",
      "full name": "int32x2_t vcvt_n_s32_f32(float32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvtq_n_s32_f32",
      "full name": "int32x4_t vcvtq_n_s32_f32(float32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [32]",
      "function_cn": "[向量] 浮点数转为有符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvt_n_u32_f32",
      "full name": "uint32x2_t vcvt_n_u32_f32(float32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvtq_n_u32_f32",
      "full name": "uint32x4_t vcvtq_n_u32_f32(float32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [32]",
      "function_cn": "[向量] 浮点数转为无符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvts_n_s32_f32",
      "full name": "int32_t vcvts_n_s32_f32(float32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[scalar] fcvtzs [32]",
      "function_cn": "[标量] 浮点数转为有符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvts_n_u32_f32",
      "full name": "uint32_t vcvts_n_u32_f32(float32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[scalar] fcvtzu [32]",
      "function_cn": "[标量] 浮点数转为无符号整数，向零舍入 [32]"
    },
    {
      "name": "vcvt_n_s64_f64",
      "full name": "int64x1_t vcvt_n_s64_f64(float64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtq_n_s64_f64",
      "full name": "int64x2_t vcvtq_n_s64_f64(float64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[vector] fcvtzs [64]",
      "function_cn": "[向量] 浮点数转为有符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvt_n_u64_f64",
      "full name": "uint64x1_t vcvt_n_u64_f64(float64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtq_n_u64_f64",
      "full name": "uint64x2_t vcvtq_n_u64_f64(float64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[vector] fcvtzu [64]",
      "function_cn": "[向量] 浮点数转为无符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtd_n_s64_f64",
      "full name": "int64_t vcvtd_n_s64_f64(float64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzs",
      "function_en": "[scalar] fcvtzs [64]",
      "function_cn": "[标量] 浮点数转为有符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvtd_n_u64_f64",
      "full name": "uint64_t vcvtd_n_u64_f64(float64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtzu",
      "function_en": "[scalar] fcvtzu [64]",
      "function_cn": "[标量] 浮点数转为无符号整数，向零舍入 [64]"
    },
    {
      "name": "vcvt_f32_s32",
      "full name": "float32x2_t vcvt_f32_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [32]",
      "function_cn": "[向量] 有符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvtq_f32_s32",
      "full name": "float32x4_t vcvtq_f32_s32(int32x4_t a)",
      "Intel name": "_mm_cvtepi32_ps",
      "Intel Asm": "cvtdq2ps",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [32]",
      "function_cn": "[向量] 有符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvt_f32_u32",
      "full name": "float32x2_t vcvt_f32_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [32]",
      "function_cn": "[向量] 无符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvtq_f32_u32",
      "full name": "float32x4_t vcvtq_f32_u32(uint32x4_t a)",
      "Intel name": "_mm512_cvtepu32_ps",
      "Intel Asm": "vcvtudq2ps",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [32]",
      "function_cn": "[向量] 无符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvts_f32_s32",
      "full name": "float32_t vcvts_f32_s32(int32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[scalar] scvtf [32]",
      "function_cn": "[标量] 有符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvts_f32_u32",
      "full name": "float32_t vcvts_f32_u32(uint32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[scalar] ucvtf [32]",
      "function_cn": "[标量] 无符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvt_f64_s64",
      "full name": "float64x1_t vcvt_f64_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [64]",
      "function_cn": "[向量] 有符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtq_f64_s64",
      "full name": "float64x2_t vcvtq_f64_s64(int64x2_t a)",
      "Intel name": "_mm_cvtepi64_pd",
      "Intel Asm": "vcvtqq2pd",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [64]",
      "function_cn": "[向量] 有符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvt_f64_u64",
      "full name": "float64x1_t vcvt_f64_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [64]",
      "function_cn": "[向量] 无符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtq_f64_u64",
      "full name": "float64x2_t vcvtq_f64_u64(uint64x2_t a)",
      "Intel name": "_mm_cvtepu64_pd",
      "Intel Asm": "vcvtuqq2pd",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [64]",
      "function_cn": "[向量] 无符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtd_f64_s64",
      "full name": "float64_t vcvtd_f64_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[scalar] scvtf [64]",
      "function_cn": "[标量] 有符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtd_f64_u64",
      "full name": "float64_t vcvtd_f64_u64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[scalar] ucvtf [64]",
      "function_cn": "[标量] 无符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvt_n_f32_s32",
      "full name": "float32x2_t vcvt_n_f32_s32(int32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [32]",
      "function_cn": "[向量] 有符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvtq_n_f32_s32",
      "full name": "float32x4_t vcvtq_n_f32_s32(int32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [32]",
      "function_cn": "[向量] 有符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvt_n_f32_u32",
      "full name": "float32x2_t vcvt_n_f32_u32(uint32x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [32]",
      "function_cn": "[向量] 无符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvtq_n_f32_u32",
      "full name": "float32x4_t vcvtq_n_f32_u32(uint32x4_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [32]",
      "function_cn": "[向量] 无符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvts_n_f32_s32",
      "full name": "float32_t vcvts_n_f32_s32(int32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[scalar] scvtf [32]",
      "function_cn": "[标量] 有符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvts_n_f32_u32",
      "full name": "float32_t vcvts_n_f32_u32(uint32_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[scalar] ucvtf [32]",
      "function_cn": "[标量] 无符号整数转为单精度浮点数 [32]"
    },
    {
      "name": "vcvt_n_f64_s64",
      "full name": "float64x1_t vcvt_n_f64_s64(int64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [64]",
      "function_cn": "[向量] 有符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtq_n_f64_s64",
      "full name": "float64x2_t vcvtq_n_f64_s64(int64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[vector] scvtf [64]",
      "function_cn": "[向量] 有符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvt_n_f64_u64",
      "full name": "float64x1_t vcvt_n_f64_u64(uint64x1_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [64]",
      "function_cn": "[向量] 无符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtq_n_f64_u64",
      "full name": "float64x2_t vcvtq_n_f64_u64(uint64x2_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[vector] ucvtf [64]",
      "function_cn": "[向量] 无符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtd_n_f64_s64",
      "full name": "float64_t vcvtd_n_f64_s64(int64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "scvtf",
      "function_en": "[scalar] scvtf [64]",
      "function_cn": "[标量] 有符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvtd_n_f64_u64",
      "full name": "float64_t vcvtd_n_f64_u64(uint64_t a, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ucvtf",
      "function_en": "[scalar] ucvtf [64]",
      "function_cn": "[标量] 无符号整数转为双精度浮点数 [64]"
    },
    {
      "name": "vcvt_f16_f32",
      "full name": "float16x4_t vcvt_f16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtn",
      "function_en": "[vector] fcvtn [32]",
      "function_cn": "[向量] float32_t类型转为float16_t类型 [32]"
    },
    {
      "name": "vcvt_high_f16_f32",
      "full name": "float16x8_t vcvt_high_f16_f32(float16x4_t r, float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtn2",
      "function_en": "[vector] fcvtn2 [32]",
      "function_cn": "[向量] float32_t类型转为float16_t类型，写入目标寄存器的高半部分 [32]"
    },
    {
      "name": "vcvt_f32_f64",
      "full name": "float32x2_t vcvt_f32_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtn",
      "function_en": "[vector] fcvtn [64]",
      "function_cn": "[向量] float64_t类型转为float32_t类型 [64]"
    },
    {
      "name": "vcvt_high_f32_f64",
      "full name": "float32x4_t vcvt_high_f32_f64(float32x2_t r, float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtn2",
      "function_en": "[vector] fcvtn2 [64]",
      "function_cn": "[向量] float64_t类型转为float32_t类型，写入高半部分 [64]"
    },
    {
      "name": "vcvt_f32_f16",
      "full name": "float32x4_t vcvt_f32_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtl",
      "function_en": "[vector] fcvtl [16]",
      "function_cn": "[向量] float16_t类型转为float32_t类型 [16]"
    },
    {
      "name": "vcvt_high_f32_f16",
      "full name": "float32x4_t vcvt_high_f32_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtl2",
      "function_en": "[vector] fcvtl2 [16]",
      "function_cn": "[向量] float16_t类型转为float32_t类型 [16]"
    },
    {
      "name": "vcvt_f64_f32",
      "full name": "float64x2_t vcvt_f64_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtl",
      "function_en": "[vector] fcvtl [32]",
      "function_cn": "[向量] float32_t类型转为float64_t类型 [32]"
    },
    {
      "name": "vcvt_high_f64_f32",
      "full name": "float64x2_t vcvt_high_f64_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtl2",
      "function_en": "[vector] fcvtl2 [32]",
      "function_cn": "[向量] float32_t类型转为float64_t类型 [32]"
    },
    {
      "name": "vcvtx_f32_f64",
      "full name": "float32x2_t vcvtx_f32_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtxn",
      "function_en": "[vector] fcvtxn [64]",
      "function_cn": "[向量] float64_t类型转为float32_t类型 [64]"
    },
    {
      "name": "vcvtxd_f32_f64",
      "full name": "float32_t vcvtxd_f32_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtxn",
      "function_en": "[scalar] fcvtxn [64]",
      "function_cn": "[标量] float64_t类型转为float32_t类型 [64]"
    },
    {
      "name": "vcvtx_high_f32_f64",
      "full name": "float32x4_t vcvtx_high_f32_f64(float32x2_t r, float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fcvtxn2",
      "function_en": "[vector] fcvtxn2 [64]",
      "function_cn": "[向量] float64_t类型转为float32_t类型 [64]"
    },
    {
      "name": "vrnd_f32",
      "full name": "float32x2_t vrnd_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintz",
      "function_en": "[vector] frintz [32]",
      "function_cn": "[向量] 浮点数向零舍入 [32]"
    },
    {
      "name": "vrndq_f32",
      "full name": "float32x4_t vrndq_f32(float32x4_t a)",
      "Intel name": "_mm_round_ps",
      "Intel Asm": "roundps",
      "Arm Asm": "frintz",
      "function_en": "[vector] frintz [32]",
      "function_cn": "[向量] 浮点数向零舍入 [32]"
    },
    {
      "name": "vrnd_f64",
      "full name": "float64x1_t vrnd_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintz",
      "function_en": "[vector] frintz [64]",
      "function_cn": "[向量] 浮点数向零舍入 [64]"
    },
    {
      "name": "vrndq_f64",
      "full name": "float64x2_t vrndq_f64(float64x2_t a)",
      "Intel name": "_mm_round_pd",
      "Intel Asm": "roundpd",
      "Arm Asm": "frintz",
      "function_en": "[vector] frintz [64]",
      "function_cn": "[向量] 浮点数向零舍入 [64]"
    },
    {
      "name": "vrndn_f32",
      "full name": "float32x2_t vrndn_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintn",
      "function_en": "[vector] frintn [32]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vrndnq_f32",
      "full name": "float32x4_t vrndnq_f32(float32x4_t a)",
      "Intel name": "_mm_round_ps",
      "Intel Asm": "roundps",
      "Arm Asm": "frintn",
      "function_en": "[vector] frintn [32]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vrndn_f64",
      "full name": "float64x1_t vrndn_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintn",
      "function_en": "[vector] frintn [64]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vrndnq_f64",
      "full name": "float64x2_t vrndnq_f64(float64x2_t a)",
      "Intel name": "_mm_round_pd",
      "Intel Asm": "roundpd",
      "Arm Asm": "frintn",
      "function_en": "[vector] frintn [64]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时向偶数舍入 [64]"
    },
    {
      "name": "vrndns_f32",
      "full name": "float32_t vrndns_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintn",
      "function_en": "[scalar] frintn [32]",
      "function_cn": "[标量] 浮点数四舍五入到最近的值，数字落在中间时向偶数舍入 [32]"
    },
    {
      "name": "vrndm_f32",
      "full name": "float32x2_t vrndm_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintm",
      "function_en": "[vector] frintm [32]",
      "function_cn": "[向量] 浮点数向负无穷舍入 [32]"
    },
    {
      "name": "vrndmq_f32",
      "full name": "float32x4_t vrndmq_f32(float32x4_t a)",
      "Intel name": "_mm_round_ps",
      "Intel Asm": "roundps",
      "Arm Asm": "frintm",
      "function_en": "[vector] frintm [32]",
      "function_cn": "[向量] 浮点数向负无穷舍入 [32]"
    },
    {
      "name": "vrndm_f64",
      "full name": "float64x1_t vrndm_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintm",
      "function_en": "[vector] frintm [64]",
      "function_cn": "[向量] 浮点数向负无穷舍入 [64]"
    },
    {
      "name": "vrndmq_f64",
      "full name": "float64x2_t vrndmq_f64(float64x2_t a)",
      "Intel name": "_mm_round_pd",
      "Intel Asm": "roundpd",
      "Arm Asm": "frintm",
      "function_en": "[vector] frintm [64]",
      "function_cn": "[向量] 浮点数向负无穷舍入 [64]"
    },
    {
      "name": "vrndp_f32",
      "full name": "float32x2_t vrndp_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintp",
      "function_en": "[vector] frintp [32]",
      "function_cn": "[向量] 浮点数向正无穷舍入 [32]"
    },
    {
      "name": "vrndpq_f32",
      "full name": "float32x4_t vrndpq_f32(float32x4_t a)",
      "Intel name": "_mm_round_ps",
      "Intel Asm": "roundps",
      "Arm Asm": "frintp",
      "function_en": "[vector] frintp [32]",
      "function_cn": "[向量] 浮点数向正无穷舍入 [32]"
    },
    {
      "name": "vrndp_f64",
      "full name": "float64x1_t vrndp_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintp",
      "function_en": "[vector] frintp [64]",
      "function_cn": "[向量] 浮点数向正无穷舍入 [64]"
    },
    {
      "name": "vrndpq_f64",
      "full name": "float64x2_t vrndpq_f64(float64x2_t a)",
      "Intel name": "_mm_round_pd",
      "Intel Asm": "roundpd",
      "Arm Asm": "frintp",
      "function_en": "[vector] frintp [64]",
      "function_cn": "[向量] 浮点数向正无穷舍入 [64]"
    },
    {
      "name": "vrnda_f32",
      "full name": "float32x2_t vrnda_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinta",
      "function_en": "[vector] frinta [32]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vrndaq_f32",
      "full name": "float32x4_t vrndaq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinta",
      "function_en": "[vector] frinta [32]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时远离零舍入 [32]"
    },
    {
      "name": "vrnda_f64",
      "full name": "float64x1_t vrnda_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinta",
      "function_en": "[vector] frinta [64]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时远离零舍入 [64]"
    },
    {
      "name": "vrndaq_f64",
      "full name": "float64x2_t vrndaq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinta",
      "function_en": "[vector] frinta [64]",
      "function_cn": "[向量] 浮点数四舍五入到最近的值，数字落在中间时远离零舍入 [64]"
    },
    {
      "name": "vrndi_f32",
      "full name": "float32x2_t vrndi_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinti",
      "function_en": "[vector] frinti [32]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [32]"
    },
    {
      "name": "vrndiq_f32",
      "full name": "float32x4_t vrndiq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinti",
      "function_en": "[vector] frinti [32]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [32]"
    },
    {
      "name": "vrndi_f64",
      "full name": "float64x1_t vrndi_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinti",
      "function_en": "[vector] frinti [64]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [64]"
    },
    {
      "name": "vrndiq_f64",
      "full name": "float64x2_t vrndiq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frinti",
      "function_en": "[vector] frinti [64]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [64]"
    },
    {
      "name": "vrndx_f32",
      "full name": "float32x2_t vrndx_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintx",
      "function_en": "[vector] frintx [32]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [32]"
    },
    {
      "name": "vrndxq_f32",
      "full name": "float32x4_t vrndxq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintx",
      "function_en": "[vector] frintx [32]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [32]"
    },
    {
      "name": "vrndx_f64",
      "full name": "float64x1_t vrndx_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintx",
      "function_en": "[vector] frintx [64]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [64]"
    },
    {
      "name": "vrndxq_f64",
      "full name": "float64x2_t vrndxq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frintx",
      "function_en": "[vector] frintx [64]",
      "function_cn": "[向量] 浮点数舍入，使用当前FPCR(浮点控制寄存器)中设置的舍入模式 [64]"
    },
    {
      "name": "vmovn_s16",
      "full name": "int8x8_t vmovn_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn",
      "function_en": "[vector] xtn [16]",
      "function_cn": "[向量] 提取低位部分 [16]"
    },
    {
      "name": "vmovn_s32",
      "full name": "int16x4_t vmovn_s32(int32x4_t a)",
      "Intel name": "_mm_cvtepi32_epi16",
      "Intel Asm": "vpmovd",
      "Arm Asm": "xtn",
      "function_en": "[vector] xtn [32]",
      "function_cn": "[向量] 提取低位部分 [32]"
    },
    {
      "name": "vmovn_s64",
      "full name": "int32x2_t vmovn_s64(int64x2_t a)",
      "Intel name": "_mm_cvtepi64_epi32",
      "Intel Asm": "vpmovqd",
      "Arm Asm": "xtn",
      "function_en": "[vector] xtn [64]",
      "function_cn": "[向量] 提取低位部分 [64]"
    },
    {
      "name": "vmovn_u16",
      "full name": "uint8x8_t vmovn_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn",
      "function_en": "[vector] xtn [16]",
      "function_cn": "[向量] 提取低位部分 [16]"
    },
    {
      "name": "vmovn_u32",
      "full name": "uint16x4_t vmovn_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn",
      "function_en": "[vector] xtn [32]",
      "function_cn": "[向量] 提取低位部分 [32]"
    },
    {
      "name": "vmovn_u64",
      "full name": "uint32x2_t vmovn_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn",
      "function_en": "[vector] xtn [64]",
      "function_cn": "[向量] 提取低位部分 [64]"
    },
    {
      "name": "vmovn_high_s16",
      "full name": "int8x16_t vmovn_high_s16(int8x8_t r, int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn2",
      "function_en": "[vector] xtn2 [16]",
      "function_cn": "[向量] 提取高位部分 [16]"
    },
    {
      "name": "vmovn_high_s32",
      "full name": "int16x8_t vmovn_high_s32(int16x4_t r, int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn2",
      "function_en": "[vector] xtn2 [32]",
      "function_cn": "[向量] 提取高位部分 [32]"
    },
    {
      "name": "vmovn_high_s64",
      "full name": "int32x4_t vmovn_high_s64(int32x2_t r, int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn2",
      "function_en": "[vector] xtn2 [64]",
      "function_cn": "[向量] 提取高位部分 [64]"
    },
    {
      "name": "vmovn_high_u16",
      "full name": "uint8x16_t vmovn_high_u16(uint8x8_t r, uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn2",
      "function_en": "[vector] xtn2 [16]",
      "function_cn": "[向量] 提取高位部分 [16]"
    },
    {
      "name": "vmovn_high_u32",
      "full name": "uint16x8_t vmovn_high_u32(uint16x4_t r, uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn2",
      "function_en": "[vector] xtn2 [32]",
      "function_cn": "[向量] 提取高位部分 [32]"
    },
    {
      "name": "vmovn_high_u64",
      "full name": "uint32x4_t vmovn_high_u64(uint32x2_t r, uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "xtn2",
      "function_en": "[vector] xtn2 [64]",
      "function_cn": "[向量] 提取高位部分 [64]"
    },
    {
      "name": "vmovl_s8",
      "full name": "int16x8_t vmovl_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshll",
      "function_en": "[vector] sshll [8]",
      "function_cn": "[向量] 左移0位 [8]"
    },
    {
      "name": "vmovl_s16",
      "full name": "int32x4_t vmovl_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshll",
      "function_en": "[vector] sshll [16]",
      "function_cn": "[向量] 左移0位 [16]"
    },
    {
      "name": "vmovl_s32",
      "full name": "int64x2_t vmovl_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshll",
      "function_en": "[vector] sshll [32]",
      "function_cn": "[向量] 左移0位 [32]"
    },
    {
      "name": "vmovl_u8",
      "full name": "uint16x8_t vmovl_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushll",
      "function_en": "[vector] ushll [8]",
      "function_cn": "[向量] 左移0位 [8]"
    },
    {
      "name": "vmovl_u16",
      "full name": "uint32x4_t vmovl_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushll",
      "function_en": "[vector] ushll [16]",
      "function_cn": "[向量] 左移0位 [16]"
    },
    {
      "name": "vmovl_u32",
      "full name": "uint64x2_t vmovl_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushll",
      "function_en": "[vector] ushll [32]",
      "function_cn": "[向量] 左移0位 [32]"
    },
    {
      "name": "vmovl_high_s8",
      "full name": "int16x8_t vmovl_high_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshll2",
      "function_en": "[vector] sshll2 [8]",
      "function_cn": "[向量] 高位部分左移0位 [8]"
    },
    {
      "name": "vmovl_high_s16",
      "full name": "int32x4_t vmovl_high_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshll2",
      "function_en": "[vector] sshll2 [16]",
      "function_cn": "[向量] 高位部分左移0位 [16]"
    },
    {
      "name": "vmovl_high_s32",
      "full name": "int64x2_t vmovl_high_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sshll2",
      "function_en": "[vector] sshll2 [32]",
      "function_cn": "[向量] 高位部分左移0位 [32]"
    },
    {
      "name": "vmovl_high_u8",
      "full name": "uint16x8_t vmovl_high_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushll2",
      "function_en": "[vector] ushll2 [8]",
      "function_cn": "[向量] 高位部分左移0位 [8]"
    },
    {
      "name": "vmovl_high_u16",
      "full name": "uint32x4_t vmovl_high_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushll2",
      "function_en": "[vector] ushll2 [16]",
      "function_cn": "[向量] 高位部分左移0位 [16]"
    },
    {
      "name": "vmovl_high_u32",
      "full name": "uint64x2_t vmovl_high_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ushll2",
      "function_en": "[vector] ushll2 [32]",
      "function_cn": "[向量] 高位部分左移0位 [32]"
    },
    {
      "name": "vqmovn_s16",
      "full name": "int8x8_t vqmovn_s16(int16x8_t a)",
      "Intel name": "_mm_cvtsepi16_epi8",
      "Intel Asm": "vpmovswb",
      "Arm Asm": "sqxtn",
      "function_en": "[vector] sqxtn [16]",
      "function_cn": "[向量] 饱和提取每个元素的低位部分 [16]"
    },
    {
      "name": "vqmovn_s32",
      "full name": "int16x4_t vqmovn_s32(int32x4_t a)",
      "Intel name": "_mm_cvtsepi32_epi16",
      "Intel Asm": "vpmovsdw",
      "Arm Asm": "sqxtn",
      "function_en": "[vector] sqxtn [32]",
      "function_cn": "[向量] 饱和提取每个元素的低位部分 [32]"
    },
    {
      "name": "vqmovn_s64",
      "full name": "int32x2_t vqmovn_s64(int64x2_t a)",
      "Intel name": "_mm_cvtsepi64_epi32",
      "Intel Asm": "vpmovsqd",
      "Arm Asm": "sqxtn",
      "function_en": "[vector] sqxtn [64]",
      "function_cn": "[向量] 饱和提取每个元素的低位部分 [64]"
    },
    {
      "name": "vqmovn_u16",
      "full name": "uint8x8_t vqmovn_u16(uint16x8_t a)",
      "Intel name": "_mm_cvtusepi16_epi8",
      "Intel Asm": "vpmovuswb",
      "Arm Asm": "uqxtn",
      "function_en": "[vector] uqxtn [16]",
      "function_cn": "[向量] 饱和提取每个元素的低位部分 [16]"
    },
    {
      "name": "vqmovn_u32",
      "full name": "uint16x4_t vqmovn_u32(uint32x4_t a)",
      "Intel name": "_mm_cvtusepi32_epi16",
      "Intel Asm": "vpmovusdw",
      "Arm Asm": "uqxtn",
      "function_en": "[vector] uqxtn [32]",
      "function_cn": "[向量] 饱和提取每个元素的低位部分 [32]"
    },
    {
      "name": "vqmovn_u64",
      "full name": "uint32x2_t vqmovn_u64(uint64x2_t a)",
      "Intel name": "_mm_cvtusepi64_epi32",
      "Intel Asm": "vpmovusqd",
      "Arm Asm": "uqxtn",
      "function_en": "[vector] uqxtn [64]",
      "function_cn": "[向量] 饱和提取每个元素的低位部分 [64]"
    },
    {
      "name": "vqmovnh_s16",
      "full name": "int8_t vqmovnh_s16(int16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtn",
      "function_en": "[scalar] sqxtn [16]",
      "function_cn": "[标量] 饱和提取每个元素的低位部分 [16]"
    },
    {
      "name": "vqmovns_s32",
      "full name": "int16_t vqmovns_s32(int32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtn",
      "function_en": "[scalar] sqxtn [32]",
      "function_cn": "[标量] 饱和提取每个元素的低位部分 [32]"
    },
    {
      "name": "vqmovnd_s64",
      "full name": "int32_t vqmovnd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtn",
      "function_en": "[scalar] sqxtn [64]",
      "function_cn": "[标量] 饱和提取每个元素的低位部分 [64]"
    },
    {
      "name": "vqmovnh_u16",
      "full name": "uint8_t vqmovnh_u16(uint16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqxtn",
      "function_en": "[scalar] uqxtn [16]",
      "function_cn": "[标量] 饱和提取每个元素的低位部分 [16]"
    },
    {
      "name": "vqmovns_u32",
      "full name": "uint16_t vqmovns_u32(uint32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqxtn",
      "function_en": "[scalar] uqxtn [32]",
      "function_cn": "[标量] 饱和提取每个元素的低位部分 [32]"
    },
    {
      "name": "vqmovnd_u64",
      "full name": "uint32_t vqmovnd_u64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqxtn",
      "function_en": "[scalar] uqxtn [64]",
      "function_cn": "[标量] 饱和提取每个元素的低位部分 [64]"
    },
    {
      "name": "vqmovn_high_s16",
      "full name": "int8x16_t vqmovn_high_s16(int8x8_t r, int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtn2",
      "function_en": "[vector] sqxtn2 [16]",
      "function_cn": "[向量] 饱和提取向量a中每个元素的高位部分，写入目标寄存器的高半部分 [16]"
    },
    {
      "name": "vqmovn_high_s32",
      "full name": "int16x8_t vqmovn_high_s32(int16x4_t r, int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtn2",
      "function_en": "[vector] sqxtn2 [32]",
      "function_cn": "[向量] 饱和提取向量a中每个元素的高位部分，写入目标寄存器的高半部分 [32]"
    },
    {
      "name": "vqmovn_high_s64",
      "full name": "int32x4_t vqmovn_high_s64(int32x2_t r, int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtn2",
      "function_en": "[vector] sqxtn2 [64]",
      "function_cn": "[向量] 饱和提取向量a中每个元素的高位部分，写入目标寄存器的高半部分 [64]"
    },
    {
      "name": "vqmovn_high_u16",
      "full name": "uint8x16_t vqmovn_high_u16(uint8x8_t r, uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqxtn2",
      "function_en": "[vector] uqxtn2 [16]",
      "function_cn": "[向量] 饱和提取向量a中每个元素的高位部分，写入目标寄存器的高半部分 [16]"
    },
    {
      "name": "vqmovn_high_u32",
      "full name": "uint16x8_t vqmovn_high_u32(uint16x4_t r, uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqxtn2",
      "function_en": "[vector] uqxtn2 [32]",
      "function_cn": "[向量] 饱和提取向量a中每个元素的高位部分，写入目标寄存器的高半部分 [32]"
    },
    {
      "name": "vqmovn_high_u64",
      "full name": "uint32x4_t vqmovn_high_u64(uint32x2_t r, uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uqxtn2",
      "function_en": "[vector] uqxtn2 [64]",
      "function_cn": "[向量] 饱和提取向量a中每个元素的高位部分，写入目标寄存器的高半部分 [64]"
    },
    {
      "name": "vqmovun_s16",
      "full name": "uint8x8_t vqmovun_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun",
      "function_en": "[vector] sqxtun [16]",
      "function_cn": "[向量] 有符号整数饱和提取低半部分为无符号整数 [16]"
    },
    {
      "name": "vqmovun_s32",
      "full name": "uint16x4_t vqmovun_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun",
      "function_en": "[vector] sqxtun [32]",
      "function_cn": "[向量] 有符号整数饱和提取低半部分为无符号整数 [32]"
    },
    {
      "name": "vqmovun_s64",
      "full name": "uint32x2_t vqmovun_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun",
      "function_en": "[vector] sqxtun [64]",
      "function_cn": "[向量] 有符号整数饱和提取低半部分为无符号整数 [64]"
    },
    {
      "name": "vqmovunh_s16",
      "full name": "uint8_t vqmovunh_s16(int16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun",
      "function_en": "[scalar] sqxtun [16]",
      "function_cn": "[标量] 有符号整数饱和提取低半部分为无符号整数 [16]"
    },
    {
      "name": "vqmovuns_s32",
      "full name": "uint16_t vqmovuns_s32(int32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun",
      "function_en": "[scalar] sqxtun [32]",
      "function_cn": "[标量] 有符号整数饱和提取低半部分为无符号整数 [32]"
    },
    {
      "name": "vqmovund_s64",
      "full name": "uint32_t vqmovund_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun",
      "function_en": "[scalar] sqxtun [64]",
      "function_cn": "[标量] 有符号整数饱和提取低半部分为无符号整数 [64]"
    },
    {
      "name": "vqmovun_high_s16",
      "full name": "uint8x16_t vqmovun_high_s16(uint8x8_t r, int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun2",
      "function_en": "[vector] sqxtun2 [16]",
      "function_cn": "[向量] 有符号整数饱和提取高半部分为无符号整数，写入目标存储器的高半部分 [16]"
    },
    {
      "name": "vqmovun_high_s32",
      "full name": "uint16x8_t vqmovun_high_s32(uint16x4_t r, int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun2",
      "function_en": "[vector] sqxtun2 [32]",
      "function_cn": "[向量] 有符号整数饱和提取高半部分为无符号整数，写入目标存储器的高半部分 [32]"
    },
    {
      "name": "vqmovun_high_s64",
      "full name": "uint32x4_t vqmovun_high_s64(uint32x2_t r, int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqxtun2",
      "function_en": "[vector] sqxtun2 [64]",
      "function_cn": "[向量] 有符号整数饱和提取高半部分为无符号整数，写入目标存储器的高半部分 [64]"
    },
    {
      "name": "vmla_lane_s16",
      "full name": "int16x4_t vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlaq_lane_s16",
      "full name": "int16x8_t vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmla_lane_s32",
      "full name": "int32x2_t vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlaq_lane_s32",
      "full name": "int32x4_t vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmla_lane_u16",
      "full name": "uint16x4_t vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlaq_lane_u16",
      "full name": "uint16x8_t vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmla_lane_u32",
      "full name": "uint32x2_t vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlaq_lane_u32",
      "full name": "uint32x4_t vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmla_lane_f32",
      "full name": "float32x2_t vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlaq_lane_f32",
      "full name": "float32x4_t vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmla_laneq_s16",
      "full name": "int16x4_t vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlaq_laneq_s16",
      "full name": "int16x8_t vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmla_laneq_s32",
      "full name": "int32x2_t vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlaq_laneq_s32",
      "full name": "int32x4_t vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmla_laneq_u16",
      "full name": "uint16x4_t vmla_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlaq_laneq_u16",
      "full name": "uint16x8_t vmlaq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmla_laneq_u32",
      "full name": "uint32x2_t vmla_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlaq_laneq_u32",
      "full name": "uint32x4_t vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmla_laneq_f32",
      "full name": "float32x2_t vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlaq_laneq_f32",
      "full name": "float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_lane_s16",
      "full name": "int32x4_t vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_lane_s32",
      "full name": "int64x2_t vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_lane_u16",
      "full name": "uint32x4_t vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_lane_u32",
      "full name": "uint64x2_t vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_high_lane_s16",
      "full name": "int32x4_t vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_high_lane_s32",
      "full name": "int64x2_t vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_high_lane_u16",
      "full name": "uint32x4_t vmlal_high_lane_u16(uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_high_lane_u32",
      "full name": "uint64x2_t vmlal_high_lane_u32(uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_laneq_s16",
      "full name": "int32x4_t vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_laneq_s32",
      "full name": "int64x2_t vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_laneq_u16",
      "full name": "uint32x4_t vmlal_laneq_u16(uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_laneq_u32",
      "full name": "uint64x2_t vmlal_laneq_u32(uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_high_laneq_s16",
      "full name": "int32x4_t vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_high_laneq_s32",
      "full name": "int64x2_t vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [32]"
    },
    {
      "name": "vmlal_high_laneq_u16",
      "full name": "uint32x4_t vmlal_high_laneq_u16(uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [16]"
    },
    {
      "name": "vmlal_high_laneq_u32",
      "full name": "uint64x2_t vmlal_high_laneq_u32(uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a [32]"
    },
    {
      "name": "vqdmlal_lane_s16",
      "full name": "int32x4_t vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中的每个元素，再加上a(饱和运算) [16]"
    },
    {
      "name": "vqdmlal_lane_s32",
      "full name": "int64x2_t vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中每个元素，再加上a(饱和运算) [32]"
    },
    {
      "name": "vqdmlalh_lane_s16",
      "full name": "int32_t vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[scalar] sqdmlal [16]",
      "function_cn": "[标量] 向量v的第lane个元素分别乘以b中每个元素，再加上a(饱和运算) [16]"
    },
    {
      "name": "vqdmlals_lane_s32",
      "full name": "int64_t vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[scalar] sqdmlal [32]",
      "function_cn": "[标量] 向量v的第lane个元素分别乘以b中每个元素，再加上a(饱和运算) [32]"
    },
    {
      "name": "vqdmlal_high_lane_s16",
      "full name": "int32x4_t vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a(饱和运算) [16]"
    },
    {
      "name": "vqdmlal_high_lane_s32",
      "full name": "int64x2_t vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a(饱和运算) [32]"
    },
    {
      "name": "vqdmlal_laneq_s16",
      "full name": "int32x4_t vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中每个元素，再加上a(饱和运算) [16]"
    },
    {
      "name": "vqdmlal_laneq_s32",
      "full name": "int64x2_t vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b中每个元素，再加上a(饱和运算) [16]"
    },
    {
      "name": "vqdmlalh_laneq_s16",
      "full name": "int32_t vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[scalar] sqdmlal [16]",
      "function_cn": "[标量] 向量v的第lane个元素分别乘以b中每个元素，再加上a(饱和运算) [16]"
    },
    {
      "name": "vqdmlals_laneq_s32",
      "full name": "int64_t vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[scalar] sqdmlal [32]",
      "function_cn": "[标量] 向量v的第lane个元素分别乘以b中每个元素，再加上a(饱和运算) [32]"
    },
    {
      "name": "vqdmlal_high_laneq_s16",
      "full name": "int32x4_t vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [16]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a(饱和运算) [16]"
    },
    {
      "name": "vqdmlal_high_laneq_s32",
      "full name": "int64x2_t vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [32]",
      "function_cn": "[向量] 向量v的第lane个元素分别乘以b的高半部分中每个元素，再加上a(饱和运算) [32]"
    },
    {
      "name": "vmls_lane_s16",
      "full name": "int16x4_t vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_lane_s16",
      "full name": "int16x8_t vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_lane_s32",
      "full name": "int32x2_t vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_lane_s32",
      "full name": "int32x4_t vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_lane_u16",
      "full name": "uint16x4_t vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_lane_u16",
      "full name": "uint16x8_t vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_lane_u32",
      "full name": "uint32x2_t vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_lane_u32",
      "full name": "uint32x4_t vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_lane_f32",
      "full name": "float32x2_t vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 浮点数乘减 [32]"
    },
    {
      "name": "vmlsq_lane_f32",
      "full name": "float32x4_t vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 浮点数乘减 [32]"
    },
    {
      "name": "vmls_laneq_s16",
      "full name": "int16x4_t vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_laneq_s16",
      "full name": "int16x8_t vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_laneq_s32",
      "full name": "int32x2_t vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_laneq_s32",
      "full name": "int32x4_t vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_laneq_u16",
      "full name": "uint16x4_t vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_laneq_u16",
      "full name": "uint16x8_t vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_laneq_u32",
      "full name": "uint32x2_t vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_laneq_u32",
      "full name": "uint32x4_t vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_laneq_f32",
      "full name": "float32x2_t vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 浮点数乘减 [32]"
    },
    {
      "name": "vmlsq_laneq_f32",
      "full name": "float32x4_t vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 浮点数乘减 [32]"
    },
    {
      "name": "vmlsl_lane_s16",
      "full name": "int32x4_t vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_lane_s32",
      "full name": "int64x2_t vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_lane_u16",
      "full name": "uint32x4_t vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_lane_u32",
      "full name": "uint64x2_t vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_high_lane_s16",
      "full name": "int32x4_t vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [16]",
      "function_cn": "[向量] 乘减(b选取高半部分) [16]"
    },
    {
      "name": "vmlsl_high_lane_s32",
      "full name": "int64x2_t vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [32]",
      "function_cn": "[向量] 乘减(b选取高半部分) [32]"
    },
    {
      "name": "vmlsl_high_lane_u16",
      "full name": "uint32x4_t vmlsl_high_lane_u16(uint32x4_t a, uint16x8_t b, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [16]",
      "function_cn": "[向量] 乘减(b选取高半部分) [16]"
    },
    {
      "name": "vmlsl_high_lane_u32",
      "full name": "uint64x2_t vmlsl_high_lane_u32(uint64x2_t a, uint32x4_t b, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [32]",
      "function_cn": "[向量] 乘减(b选取高半部分) [32]"
    },
    {
      "name": "vmlsl_laneq_s16",
      "full name": "int32x4_t vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_laneq_s32",
      "full name": "int64x2_t vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_laneq_u16",
      "full name": "uint32x4_t vmlsl_laneq_u16(uint32x4_t a, uint16x4_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_laneq_u32",
      "full name": "uint64x2_t vmlsl_laneq_u32(uint64x2_t a, uint32x2_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_high_laneq_s16",
      "full name": "int32x4_t vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [16]",
      "function_cn": "[向量] 乘减(b选取高半部分) [16]"
    },
    {
      "name": "vmlsl_high_laneq_s32",
      "full name": "int64x2_t vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [32]",
      "function_cn": "[向量] 乘减(b选取高半部分) [32]"
    },
    {
      "name": "vmlsl_high_laneq_u16",
      "full name": "uint32x4_t vmlsl_high_laneq_u16(uint32x4_t a, uint16x8_t b, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [16]",
      "function_cn": "[向量] 乘减(b选取高半部分) [16]"
    },
    {
      "name": "vmlsl_high_laneq_u32",
      "full name": "uint64x2_t vmlsl_high_laneq_u32(uint64x2_t a, uint32x4_t b, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [32]",
      "function_cn": "[向量] 乘减(b选取高半部分) [32]"
    },
    {
      "name": "vqdmlsl_lane_s16",
      "full name": "int32x4_t vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [16]",
      "function_cn": "[向量] 饱和乘减 [16]"
    },
    {
      "name": "vqdmlsl_lane_s32",
      "full name": "int64x2_t vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [32]",
      "function_cn": "[向量] 饱和乘减 [32]"
    },
    {
      "name": "vqdmlslh_lane_s16",
      "full name": "int32_t vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[scalar] sqdmlsl [16]",
      "function_cn": "[标量] 饱和乘减 [16]"
    },
    {
      "name": "vqdmlsls_lane_s32",
      "full name": "int64_t vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[scalar] sqdmlsl [32]",
      "function_cn": "[标量] 饱和乘减 [32]"
    },
    {
      "name": "vqdmlsl_high_lane_s16",
      "full name": "int32x4_t vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [16]",
      "function_cn": "[向量] 饱和乘减(b选取高半部分) [16]"
    },
    {
      "name": "vqdmlsl_high_lane_s32",
      "full name": "int64x2_t vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [32]",
      "function_cn": "[向量] 饱和乘减(b选取高半部分) [32]"
    },
    {
      "name": "vqdmlsl_laneq_s16",
      "full name": "int32x4_t vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [16]",
      "function_cn": "[向量] 饱和乘减 [16]"
    },
    {
      "name": "vqdmlsl_laneq_s32",
      "full name": "int64x2_t vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [32]",
      "function_cn": "[向量] 饱和乘减 [32]"
    },
    {
      "name": "vqdmlslh_laneq_s16",
      "full name": "int32_t vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [16]",
      "function_cn": "[向量] 饱和乘减 [16]"
    },
    {
      "name": "vqdmlsls_laneq_s32",
      "full name": "int64_t vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [32]",
      "function_cn": "[向量] 饱和乘减 [32]"
    },
    {
      "name": "vqdmlsl_high_laneq_s16",
      "full name": "int32x4_t vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [16]",
      "function_cn": "[向量] 饱和乘减(b选取高半部分) [16]"
    },
    {
      "name": "vqdmlsl_high_laneq_s32",
      "full name": "int64x2_t vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [32]",
      "function_cn": "[向量] 饱和乘减(b选取高半部分) [32]"
    },
    {
      "name": "vmul_n_s16",
      "full name": "int16x4_t vmul_n_s16(int16x4_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmulq_n_s16",
      "full name": "int16x8_t vmulq_n_s16(int16x8_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmul_n_s32",
      "full name": "int32x2_t vmul_n_s32(int32x2_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulq_n_s32",
      "full name": "int32x4_t vmulq_n_s32(int32x4_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmul_n_u16",
      "full name": "uint16x4_t vmul_n_u16(uint16x4_t a, uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmulq_n_u16",
      "full name": "uint16x8_t vmulq_n_u16(uint16x8_t a, uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmul_n_u32",
      "full name": "uint32x2_t vmul_n_u32(uint32x2_t a, uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmulq_n_u32",
      "full name": "uint32x4_t vmulq_n_u32(uint32x4_t a, uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmul_n_f32",
      "full name": "float32x2_t vmul_n_f32(float32x2_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 浮点数乘 [32]"
    },
    {
      "name": "vmulq_n_f32",
      "full name": "float32x4_t vmulq_n_f32(float32x4_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 浮点数乘 [32]"
    },
    {
      "name": "vmul_n_f64",
      "full name": "float64x1_t vmul_n_f64(float64x1_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 浮点数乘 [64]"
    },
    {
      "name": "vmulq_n_f64",
      "full name": "float64x2_t vmulq_n_f64(float64x2_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 浮点数乘 [64]"
    },
    {
      "name": "vmul_lane_s16",
      "full name": "int16x4_t vmul_lane_s16(int16x4_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [16]"
    },
    {
      "name": "vmulq_lane_s16",
      "full name": "int16x8_t vmulq_lane_s16(int16x8_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [16]"
    },
    {
      "name": "vmul_lane_s32",
      "full name": "int32x2_t vmul_lane_s32(int32x2_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmulq_lane_s32",
      "full name": "int32x4_t vmulq_lane_s32(int32x4_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmul_lane_u16",
      "full name": "uint16x4_t vmul_lane_u16(uint16x4_t a, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmulq_lane_u16",
      "full name": "uint16x8_t vmulq_lane_u16(uint16x8_t a, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [16]"
    },
    {
      "name": "vmul_lane_u32",
      "full name": "uint32x2_t vmul_lane_u32(uint32x2_t a, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmulq_lane_u32",
      "full name": "uint32x4_t vmulq_lane_u32(uint32x4_t a, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmul_lane_f32",
      "full name": "float32x2_t vmul_lane_f32(float32x2_t a, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmulq_lane_f32",
      "full name": "float32x4_t vmulq_lane_f32(float32x4_t a, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmul_lane_f64",
      "full name": "float64x1_t vmul_lane_f64(float64x1_t a, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [64]"
    },
    {
      "name": "vmulq_lane_f64",
      "full name": "float64x2_t vmulq_lane_f64(float64x2_t a, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [64]"
    },
    {
      "name": "vmuls_lane_f32",
      "full name": "float32_t vmuls_lane_f32(float32_t a, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[scalar] fmul [32]",
      "function_cn": "[标量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmuld_lane_f64",
      "full name": "float64_t vmuld_lane_f64(float64_t a, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[scalar] fmul [64]",
      "function_cn": "[标量] 向量a中的元素分别乘以向量v的第lane个元素 [64]"
    },
    {
      "name": "vmul_laneq_s16",
      "full name": "int16x4_t vmul_laneq_s16(int16x4_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [16]"
    },
    {
      "name": "vmulq_laneq_s16",
      "full name": "int16x8_t vmulq_laneq_s16(int16x8_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [16]"
    },
    {
      "name": "vmul_laneq_s32",
      "full name": "int32x2_t vmul_laneq_s32(int32x2_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmulq_laneq_s32",
      "full name": "int32x4_t vmulq_laneq_s32(int32x4_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmul_laneq_u16",
      "full name": "uint16x4_t vmul_laneq_u16(uint16x4_t a, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [16]"
    },
    {
      "name": "vmulq_laneq_u16",
      "full name": "uint16x8_t vmulq_laneq_u16(uint16x8_t a, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [16]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [16]"
    },
    {
      "name": "vmul_laneq_u32",
      "full name": "uint32x2_t vmul_laneq_u32(uint32x2_t a, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmulq_laneq_u32",
      "full name": "uint32x4_t vmulq_laneq_u32(uint32x4_t a, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mul",
      "function_en": "[vector] mul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmul_laneq_f32",
      "full name": "float32x2_t vmul_laneq_f32(float32x2_t a, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmulq_laneq_f32",
      "full name": "float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [32]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmul_laneq_f64",
      "full name": "float64x1_t vmul_laneq_f64(float64x1_t a, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [64]"
    },
    {
      "name": "vmulq_laneq_f64",
      "full name": "float64x2_t vmulq_laneq_f64(float64x2_t a, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[vector] fmul [64]",
      "function_cn": "[向量] 向量a中的元素分别乘以向量v的第lane个元素 [64]"
    },
    {
      "name": "vmuls_laneq_f32",
      "full name": "float32_t vmuls_laneq_f32(float32_t a, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[scalar] fmul [32]",
      "function_cn": "[标量] 向量a中的元素分别乘以向量v的第lane个元素 [32]"
    },
    {
      "name": "vmuld_laneq_f64",
      "full name": "float64_t vmuld_laneq_f64(float64_t a, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmul",
      "function_en": "[scalar] fmul [64]",
      "function_cn": "[标量] 向量a中的元素分别乘以向量v的第lane个元素 [64]"
    },
    {
      "name": "vmull_n_s16",
      "full name": "int32x4_t vmull_n_s16(int16x4_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmull_n_s32",
      "full name": "int64x2_t vmull_n_s32(int32x2_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmull_n_u16",
      "full name": "uint32x4_t vmull_n_u16(uint16x4_t a, uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmull_n_u32",
      "full name": "uint64x2_t vmull_n_u32(uint32x2_t a, uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmull_high_n_s16",
      "full name": "int32x4_t vmull_high_n_s16(int16x8_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [16]",
      "function_cn": "[向量] 乘(a选取高半部分) [16]"
    },
    {
      "name": "vmull_high_n_s32",
      "full name": "int64x2_t vmull_high_n_s32(int32x4_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [32]",
      "function_cn": "[向量] 乘(a选取高半部分) [32]"
    },
    {
      "name": "vmull_high_n_u16",
      "full name": "uint32x4_t vmull_high_n_u16(uint16x8_t a, uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [16]",
      "function_cn": "[向量] 乘(a选取高半部分) [16]"
    },
    {
      "name": "vmull_high_n_u32",
      "full name": "uint64x2_t vmull_high_n_u32(uint32x4_t a, uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [32]",
      "function_cn": "[向量] 乘(a选取高半部分) [32]"
    },
    {
      "name": "vmull_lane_s16",
      "full name": "int32x4_t vmull_lane_s16(int16x4_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmull_lane_s32",
      "full name": "int64x2_t vmull_lane_s32(int32x2_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmull_lane_u16",
      "full name": "uint32x4_t vmull_lane_u16(uint16x4_t a, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmull_lane_u32",
      "full name": "uint64x2_t vmull_lane_u32(uint32x2_t a, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmull_high_lane_s16",
      "full name": "int32x4_t vmull_high_lane_s16(int16x8_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [16]",
      "function_cn": "[向量] 乘(a选取高半部分) [16]"
    },
    {
      "name": "vmull_high_lane_s32",
      "full name": "int64x2_t vmull_high_lane_s32(int32x4_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [32]",
      "function_cn": "[向量] 乘(a选取高半部分) [32]"
    },
    {
      "name": "vmull_high_lane_u16",
      "full name": "uint32x4_t vmull_high_lane_u16(uint16x8_t a, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [16]",
      "function_cn": "[向量] 乘(a选取高半部分) [16]"
    },
    {
      "name": "vmull_high_lane_u32",
      "full name": "uint64x2_t vmull_high_lane_u32(uint32x4_t a, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [32]",
      "function_cn": "[向量] 乘(a选取高半部分) [32]"
    },
    {
      "name": "vmull_laneq_s16",
      "full name": "int32x4_t vmull_laneq_s16(int16x4_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmull_laneq_s32",
      "full name": "int64x2_t vmull_laneq_s32(int32x2_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull",
      "function_en": "[vector] smull [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmull_laneq_u16",
      "full name": "uint32x4_t vmull_laneq_u16(uint16x4_t a, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [16]",
      "function_cn": "[向量] 乘 [16]"
    },
    {
      "name": "vmull_laneq_u32",
      "full name": "uint64x2_t vmull_laneq_u32(uint32x2_t a, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull",
      "function_en": "[vector] umull [32]",
      "function_cn": "[向量] 乘 [32]"
    },
    {
      "name": "vmull_high_laneq_s16",
      "full name": "int32x4_t vmull_high_laneq_s16(int16x8_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [16]",
      "function_cn": "[向量] 乘(a选取高半部分) [16]"
    },
    {
      "name": "vmull_high_laneq_s32",
      "full name": "int64x2_t vmull_high_laneq_s32(int32x4_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smull2",
      "function_en": "[vector] smull2 [32]",
      "function_cn": "[向量] 乘(a选取高半部分) [32]"
    },
    {
      "name": "vmull_high_laneq_u16",
      "full name": "uint32x4_t vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [16]",
      "function_cn": "[向量] 乘(a选取高半部分) [16]"
    },
    {
      "name": "vmull_high_laneq_u32",
      "full name": "uint64x2_t vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umull2",
      "function_en": "[vector] umull2 [32]",
      "function_cn": "[向量] 乘(a选取高半部分) [32]"
    },
    {
      "name": "vqdmull_n_s16",
      "full name": "int32x4_t vqdmull_n_s16(int16x4_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [16]",
      "function_cn": "[向量] 饱和乘 [16]"
    },
    {
      "name": "vqdmull_n_s32",
      "full name": "int64x2_t vqdmull_n_s32(int32x2_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [32]",
      "function_cn": "[向量] 饱和乘 [32]"
    },
    {
      "name": "vqdmull_high_n_s16",
      "full name": "int32x4_t vqdmull_high_n_s16(int16x8_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [16]",
      "function_cn": "[向量] 饱和乘(a选取高半部分) [16]"
    },
    {
      "name": "vqdmull_high_n_s32",
      "full name": "int64x2_t vqdmull_high_n_s32(int32x4_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [32]",
      "function_cn": "[向量] 饱和乘(a选取高半部分) [32]"
    },
    {
      "name": "vqdmull_lane_s16",
      "full name": "int32x4_t vqdmull_lane_s16(int16x4_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [16]",
      "function_cn": "[向量] 饱和乘 [16]"
    },
    {
      "name": "vqdmull_lane_s32",
      "full name": "int64x2_t vqdmull_lane_s32(int32x2_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [32]",
      "function_cn": "[向量] 饱和乘 [32]"
    },
    {
      "name": "vqdmullh_lane_s16",
      "full name": "int32_t vqdmullh_lane_s16(int16_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [16]",
      "function_cn": "[向量] 饱和乘 [16]"
    },
    {
      "name": "vqdmulls_lane_s32",
      "full name": "int64_t vqdmulls_lane_s32(int32_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [32]",
      "function_cn": "[向量] 饱和乘 [32]"
    },
    {
      "name": "vqdmull_high_lane_s16",
      "full name": "int32x4_t vqdmull_high_lane_s16(int16x8_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [16]",
      "function_cn": "[向量] 饱和乘(a选取高半部分) [16]"
    },
    {
      "name": "vqdmull_high_lane_s32",
      "full name": "int64x2_t vqdmull_high_lane_s32(int32x4_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [32]",
      "function_cn": "[向量] 饱和乘(a选取高半部分) [32]"
    },
    {
      "name": "vqdmull_laneq_s16",
      "full name": "int32x4_t vqdmull_laneq_s16(int16x4_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [16]",
      "function_cn": "[向量] 饱和乘 [16]"
    },
    {
      "name": "vqdmull_laneq_s32",
      "full name": "int64x2_t vqdmull_laneq_s32(int32x2_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[vector] sqdmull [32]",
      "function_cn": "[向量] 饱和乘 [32]"
    },
    {
      "name": "vqdmullh_laneq_s16",
      "full name": "int32_t vqdmullh_laneq_s16(int16_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[scalar] sqdmull [16]",
      "function_cn": "[标量] 饱和乘 [16]"
    },
    {
      "name": "vqdmulls_laneq_s32",
      "full name": "int64_t vqdmulls_laneq_s32(int32_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull",
      "function_en": "[scalar] sqdmull [32]",
      "function_cn": "[标量] 饱和乘 [32]"
    },
    {
      "name": "vqdmull_high_laneq_s16",
      "full name": "int32x4_t vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [16]",
      "function_cn": "[向量] 饱和乘(a选取高半部分) [16]"
    },
    {
      "name": "vqdmull_high_laneq_s32",
      "full name": "int64x2_t vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmull2",
      "function_en": "[vector] sqdmull2 [32]",
      "function_cn": "[向量] 饱和乘(a选取高半部分) [32]"
    },
    {
      "name": "vqdmulh_n_s16",
      "full name": "int16x4_t vqdmulh_n_s16(int16x4_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulhq_n_s16",
      "full name": "int16x8_t vqdmulhq_n_s16(int16x8_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulh_n_s32",
      "full name": "int32x2_t vqdmulh_n_s32(int32x2_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqdmulhq_n_s32",
      "full name": "int32x4_t vqdmulhq_n_s32(int32x4_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqdmulh_lane_s16",
      "full name": "int16x4_t vqdmulh_lane_s16(int16x4_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulhq_lane_s16",
      "full name": "int16x8_t vqdmulhq_lane_s16(int16x8_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulh_lane_s32",
      "full name": "int32x2_t vqdmulh_lane_s32(int32x2_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqdmulhq_lane_s32",
      "full name": "int32x4_t vqdmulhq_lane_s32(int32x4_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqdmulhh_lane_s16",
      "full name": "int16_t vqdmulhh_lane_s16(int16_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[scalar] sqdmulh [16]",
      "function_cn": "[标量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulhs_lane_s32",
      "full name": "int32_t vqdmulhs_lane_s32(int32_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[scalar] sqdmulh [32]",
      "function_cn": "[标量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqdmulh_laneq_s16",
      "full name": "int16x4_t vqdmulh_laneq_s16(int16x4_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulhq_laneq_s16",
      "full name": "int16x8_t vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulh_laneq_s32",
      "full name": "int32x2_t vqdmulh_laneq_s32(int32x2_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqdmulhq_laneq_s32",
      "full name": "int32x4_t vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[vector] sqdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqdmulhh_laneq_s16",
      "full name": "int16_t vqdmulhh_laneq_s16(int16_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[scalar] sqdmulh [16]",
      "function_cn": "[标量] 相乘再乘以2，结果截断式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqdmulhs_laneq_s32",
      "full name": "int32_t vqdmulhs_laneq_s32(int32_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmulh",
      "function_en": "[scalar] sqdmulh [32]",
      "function_cn": "[标量] 相乘再乘以2，结果截断式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulh_n_s16",
      "full name": "int16x4_t vqrdmulh_n_s16(int16x4_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulhq_n_s16",
      "full name": "int16x8_t vqrdmulhq_n_s16(int16x8_t a, int16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulh_n_s32",
      "full name": "int32x2_t vqrdmulh_n_s32(int32x2_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulhq_n_s32",
      "full name": "int32x4_t vqrdmulhq_n_s32(int32x4_t a, int32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulh_lane_s16",
      "full name": "int16x4_t vqrdmulh_lane_s16(int16x4_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulhq_lane_s16",
      "full name": "int16x8_t vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulh_lane_s32",
      "full name": "int32x2_t vqrdmulh_lane_s32(int32x2_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulhq_lane_s32",
      "full name": "int32x4_t vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulhh_lane_s16",
      "full name": "int16_t vqrdmulhh_lane_s16(int16_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[scalar] sqrdmulh [16]",
      "function_cn": "[标量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulhs_lane_s32",
      "full name": "int32_t vqrdmulhs_lane_s32(int32_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[scalar] sqrdmulh [32]",
      "function_cn": "[标量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulh_laneq_s16",
      "full name": "int16x4_t vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulhq_laneq_s16",
      "full name": "int16x8_t vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [16]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulh_laneq_s32",
      "full name": "int32x2_t vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulhq_laneq_s32",
      "full name": "int32x4_t vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[vector] sqrdmulh [32]",
      "function_cn": "[向量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vqrdmulhh_laneq_s16",
      "full name": "int16_t vqrdmulhh_laneq_s16(int16_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[scalar] sqrdmulh [16]",
      "function_cn": "[标量] 相乘再乘以2，结果舍入式取高16位(饱和运算) [16]"
    },
    {
      "name": "vqrdmulhs_laneq_s32",
      "full name": "int32_t vqrdmulhs_laneq_s32(int32_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqrdmulh",
      "function_en": "[scalar] sqrdmulh [32]",
      "function_cn": "[标量] 相乘再乘以2，结果舍入式取高32位(饱和运算) [32]"
    },
    {
      "name": "vmla_n_s16",
      "full name": "int16x4_t vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlaq_n_s16",
      "full name": "int16x8_t vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmla_n_s32",
      "full name": "int32x2_t vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlaq_n_s32",
      "full name": "int32x4_t vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmla_n_u16",
      "full name": "uint16x4_t vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlaq_n_u16",
      "full name": "uint16x8_t vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmla_n_u32",
      "full name": "uint32x2_t vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlaq_n_u32",
      "full name": "uint32x4_t vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mla",
      "function_en": "[vector] mla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmla_n_f32",
      "full name": "float32x2_t vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlaq_n_f32",
      "full name": "float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlal_n_s16",
      "full name": "int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlal_n_s32",
      "full name": "int64x2_t vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal",
      "function_en": "[vector] smlal [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlal_n_u16",
      "full name": "uint32x4_t vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [16]",
      "function_cn": "[向量] 乘加 [16]"
    },
    {
      "name": "vmlal_n_u32",
      "full name": "uint64x2_t vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal",
      "function_en": "[vector] umlal [32]",
      "function_cn": "[向量] 乘加 [32]"
    },
    {
      "name": "vmlal_high_n_s16",
      "full name": "int32x4_t vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [16]",
      "function_cn": "[向量] 乘加(b选取高半部分) [16]"
    },
    {
      "name": "vmlal_high_n_s32",
      "full name": "int64x2_t vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlal2",
      "function_en": "[vector] smlal2 [32]",
      "function_cn": "[向量] 乘加(b选取高半部分) [32]"
    },
    {
      "name": "vmlal_high_n_u16",
      "full name": "uint32x4_t vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [16]",
      "function_cn": "[向量] 乘加(b选取高半部分) [16]"
    },
    {
      "name": "vmlal_high_n_u32",
      "full name": "uint64x2_t vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlal2",
      "function_en": "[vector] umlal2 [32]",
      "function_cn": "[向量] 乘加(b选取高半部分) [32]"
    },
    {
      "name": "vqdmlal_n_s16",
      "full name": "int32x4_t vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [16]",
      "function_cn": "[向量] 饱和乘加 [16]"
    },
    {
      "name": "vqdmlal_n_s32",
      "full name": "int64x2_t vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal",
      "function_en": "[vector] sqdmlal [32]",
      "function_cn": "[向量] 饱和乘加 [32]"
    },
    {
      "name": "vqdmlal_high_n_s16",
      "full name": "int32x4_t vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [16]",
      "function_cn": "[向量] 饱和乘加(b选取高半部分) [16]"
    },
    {
      "name": "vqdmlal_high_n_s32",
      "full name": "int64x2_t vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlal2",
      "function_en": "[vector] sqdmlal2 [32]",
      "function_cn": "[向量] 饱和乘加(b选取高半部分) [32]"
    },
    {
      "name": "vmls_n_s16",
      "full name": "int16x4_t vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_n_s16",
      "full name": "int16x8_t vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_n_s32",
      "full name": "int32x2_t vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_n_s32",
      "full name": "int32x4_t vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_n_u16",
      "full name": "uint16x4_t vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsq_n_u16",
      "full name": "uint16x8_t vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmls_n_u32",
      "full name": "uint32x2_t vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_n_u32",
      "full name": "uint32x4_t vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mls",
      "function_en": "[vector] mls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmls_n_f32",
      "full name": "float32x2_t vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsq_n_f32",
      "full name": "float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_n_s16",
      "full name": "int32x4_t vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_n_s32",
      "full name": "int64x2_t vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl",
      "function_en": "[vector] smlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_n_u16",
      "full name": "uint32x4_t vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [16]",
      "function_cn": "[向量] 乘减 [16]"
    },
    {
      "name": "vmlsl_n_u32",
      "full name": "uint64x2_t vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl",
      "function_en": "[vector] umlsl [32]",
      "function_cn": "[向量] 乘减 [32]"
    },
    {
      "name": "vmlsl_high_n_s16",
      "full name": "int32x4_t vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [16]",
      "function_cn": "[向量] 乘减(b选取高半部分) [16]"
    },
    {
      "name": "vmlsl_high_n_s32",
      "full name": "int64x2_t vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smlsl2",
      "function_en": "[vector] smlsl2 [32]",
      "function_cn": "[向量] 乘减(b选取高半部分) [32]"
    },
    {
      "name": "vmlsl_high_n_u16",
      "full name": "uint32x4_t vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [16]",
      "function_cn": "[向量] 乘减(b选取高半部分) [16]"
    },
    {
      "name": "vmlsl_high_n_u32",
      "full name": "uint64x2_t vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umlsl2",
      "function_en": "[vector] umlsl2 [32]",
      "function_cn": "[向量] 乘减(b选取高半部分) [32]"
    },
    {
      "name": "vqdmlsl_n_s16",
      "full name": "int32x4_t vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [16]",
      "function_cn": "[向量] 饱和乘减 [16]"
    },
    {
      "name": "vqdmlsl_n_s32",
      "full name": "int64x2_t vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl",
      "function_en": "[vector] sqdmlsl [32]",
      "function_cn": "[向量] 饱和乘减 [32]"
    },
    {
      "name": "vqdmlsl_high_n_s16",
      "full name": "int32x4_t vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [16]",
      "function_cn": "[向量] 饱和乘减(b选取高半部分) [16]"
    },
    {
      "name": "vqdmlsl_high_n_s32",
      "full name": "int64x2_t vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqdmlsl2",
      "function_en": "[vector] sqdmlsl2 [32]",
      "function_cn": "[向量] 饱和乘减(b选取高半部分) [32]"
    },
    {
      "name": "vabs_s8",
      "full name": "int8x8_t vabs_s8(int8x8_t a)",
      "Intel name": "_mm_abs_pi8",
      "Intel Asm": "pabsb",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [8]",
      "function_cn": "[向量] 取绝对值 [8]"
    },
    {
      "name": "vabsq_s8",
      "full name": "int8x16_t vabsq_s8(int8x16_t a)",
      "Intel name": "_mm_abs_epi8",
      "Intel Asm": "pabsb",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [8]",
      "function_cn": "[向量] 取绝对值 [8]"
    },
    {
      "name": "vabs_s16",
      "full name": "int16x4_t vabs_s16(int16x4_t a)",
      "Intel name": "_mm_abs_pi16",
      "Intel Asm": "pabsw",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [16]",
      "function_cn": "[向量] 取绝对值 [16]"
    },
    {
      "name": "vabsq_s16",
      "full name": "int16x8_t vabsq_s16(int16x8_t a)",
      "Intel name": "_mm_abs_epi16",
      "Intel Asm": "pabsw",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [16]",
      "function_cn": "[向量] 取绝对值 [16]"
    },
    {
      "name": "vabs_s32",
      "full name": "int32x2_t vabs_s32(int32x2_t a)",
      "Intel name": "_mm_abs_pi32",
      "Intel Asm": "pabsd",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [32]",
      "function_cn": "[向量] 取绝对值 [32]"
    },
    {
      "name": "vabsq_s32",
      "full name": "int32x4_t vabsq_s32(int32x4_t a)",
      "Intel name": "_mm_abs_epi32",
      "Intel Asm": "pabsd",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [32]",
      "function_cn": "[向量] 取绝对值 [32]"
    },
    {
      "name": "vabs_f32",
      "full name": "float32x2_t vabs_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabs",
      "function_en": "[vector] fabs [32]",
      "function_cn": "[向量] 取绝对值 [32]"
    },
    {
      "name": "vabsq_f32",
      "full name": "float32x4_t vabsq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabs",
      "function_en": "[vector] fabs [32]",
      "function_cn": "[向量] 取绝对值 [32]"
    },
    {
      "name": "vabs_s64",
      "full name": "int64x1_t vabs_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [64]",
      "function_cn": "[向量] 取绝对值 [64]"
    },
    {
      "name": "vabsd_s64",
      "full name": "int64_t vabsd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "abs",
      "function_en": "[scalar] abs [64]",
      "function_cn": "[标量] 取绝对值 [64]"
    },
    {
      "name": "vabsq_s64",
      "full name": "int64x2_t vabsq_s64(int64x2_t a)",
      "Intel name": "_mm_abs_epi64",
      "Intel Asm": "vpabsq",
      "Arm Asm": "abs",
      "function_en": "[vector] abs [64]",
      "function_cn": "[向量] 取绝对值 [64]"
    },
    {
      "name": "vabs_f64",
      "full name": "float64x1_t vabs_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabs",
      "function_en": "[vector] fabs [64]",
      "function_cn": "[向量] 取绝对值 [64]"
    },
    {
      "name": "vabsq_f64",
      "full name": "float64x2_t vabsq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fabs",
      "function_en": "[vector] fabs [64]",
      "function_cn": "[向量] 取绝对值 [64]"
    },
    {
      "name": "vqabs_s8",
      "full name": "int8x8_t vqabs_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [8]",
      "function_cn": "[向量] 饱和取绝对值 [8]"
    },
    {
      "name": "vqabsq_s8",
      "full name": "int8x16_t vqabsq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [8]",
      "function_cn": "[向量] 饱和取绝对值 [8]"
    },
    {
      "name": "vqabs_s16",
      "full name": "int16x4_t vqabs_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [16]",
      "function_cn": "[向量] 饱和取绝对值 [16]"
    },
    {
      "name": "vqabsq_s16",
      "full name": "int16x8_t vqabsq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [16]",
      "function_cn": "[向量] 饱和取绝对值 [16]"
    },
    {
      "name": "vqabs_s32",
      "full name": "int32x2_t vqabs_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [32]",
      "function_cn": "[向量] 饱和取绝对值 [32]"
    },
    {
      "name": "vqabsq_s32",
      "full name": "int32x4_t vqabsq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [32]",
      "function_cn": "[向量] 饱和取绝对值 [32]"
    },
    {
      "name": "vqabs_s64",
      "full name": "int64x1_t vqabs_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [64]",
      "function_cn": "[向量] 饱和取绝对值 [64]"
    },
    {
      "name": "vqabsq_s64",
      "full name": "int64x2_t vqabsq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[vector] sqabs [64]",
      "function_cn": "[向量] 饱和取绝对值 [64]"
    },
    {
      "name": "vqabsb_s8",
      "full name": "int8_t vqabsb_s8(int8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[scalar] sqabs [8]",
      "function_cn": "[标量] 饱和取绝对值 [8]"
    },
    {
      "name": "vqabsh_s16",
      "full name": "int16_t vqabsh_s16(int16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[scalar] sqabs [16]",
      "function_cn": "[标量] 饱和取绝对值 [16]"
    },
    {
      "name": "vqabss_s32",
      "full name": "int32_t vqabss_s32(int32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[scalar] sqabs [32]",
      "function_cn": "[标量] 饱和取绝对值 [32]"
    },
    {
      "name": "vqabsd_s64",
      "full name": "int64_t vqabsd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqabs",
      "function_en": "[scalar] sqabs [64]",
      "function_cn": "[标量] 饱和取绝对值 [64]"
    },
    {
      "name": "vneg_s8",
      "full name": "int8x8_t vneg_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [8]",
      "function_cn": "[向量] 符号取反 [8]"
    },
    {
      "name": "vnegq_s8",
      "full name": "int8x16_t vnegq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [8]",
      "function_cn": "[向量] 符号取反 [8]"
    },
    {
      "name": "vneg_s16",
      "full name": "int16x4_t vneg_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [16]",
      "function_cn": "[向量] 符号取反 [16]"
    },
    {
      "name": "vnegq_s16",
      "full name": "int16x8_t vnegq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [16]",
      "function_cn": "[向量] 符号取反 [16]"
    },
    {
      "name": "vneg_s32",
      "full name": "int32x2_t vneg_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [32]",
      "function_cn": "[向量] 符号取反 [32]"
    },
    {
      "name": "vnegq_s32",
      "full name": "int32x4_t vnegq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [32]",
      "function_cn": "[向量] 符号取反 [32]"
    },
    {
      "name": "vneg_f32",
      "full name": "float32x2_t vneg_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fneg",
      "function_en": "[vector] fneg [32]",
      "function_cn": "[向量] 符号取反 [32]"
    },
    {
      "name": "vnegq_f32",
      "full name": "float32x4_t vnegq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fneg",
      "function_en": "[vector] fneg [32]",
      "function_cn": "[向量] 符号取反 [32]"
    },
    {
      "name": "vneg_s64",
      "full name": "int64x1_t vneg_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [64]",
      "function_cn": "[向量] 符号取反 [64]"
    },
    {
      "name": "vnegd_s64",
      "full name": "int64_t vnegd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[scalar] neg [64]",
      "function_cn": "[标量] 符号取反 [64]"
    },
    {
      "name": "vnegq_s64",
      "full name": "int64x2_t vnegq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "neg",
      "function_en": "[vector] neg [64]",
      "function_cn": "[向量] 符号取反 [64]"
    },
    {
      "name": "vneg_f64",
      "full name": "float64x1_t vneg_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fneg",
      "function_en": "[vector] fneg [64]",
      "function_cn": "[向量] 符号取反 [64]"
    },
    {
      "name": "vnegq_f64",
      "full name": "float64x2_t vnegq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fneg",
      "function_en": "[vector] fneg [64]",
      "function_cn": "[向量] 符号取反 [64]"
    },
    {
      "name": "vqneg_s8",
      "full name": "int8x8_t vqneg_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [8]",
      "function_cn": "[向量] 饱和符号取反 [8]"
    },
    {
      "name": "vqnegq_s8",
      "full name": "int8x16_t vqnegq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [8]",
      "function_cn": "[向量] 饱和符号取反 [8]"
    },
    {
      "name": "vqneg_s16",
      "full name": "int16x4_t vqneg_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [16]",
      "function_cn": "[向量] 饱和符号取反 [16]"
    },
    {
      "name": "vqnegq_s16",
      "full name": "int16x8_t vqnegq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [16]",
      "function_cn": "[向量] 饱和符号取反 [16]"
    },
    {
      "name": "vqneg_s32",
      "full name": "int32x2_t vqneg_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [32]",
      "function_cn": "[向量] 饱和符号取反 [32]"
    },
    {
      "name": "vqnegq_s32",
      "full name": "int32x4_t vqnegq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [32]",
      "function_cn": "[向量] 饱和符号取反 [32]"
    },
    {
      "name": "vqneg_s64",
      "full name": "int64x1_t vqneg_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [64]",
      "function_cn": "[向量] 饱和符号取反 [64]"
    },
    {
      "name": "vqnegq_s64",
      "full name": "int64x2_t vqnegq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[vector] sqneg [64]",
      "function_cn": "[向量] 饱和符号取反 [64]"
    },
    {
      "name": "vqnegb_s8",
      "full name": "int8_t vqnegb_s8(int8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[scalar] sqneg [8]",
      "function_cn": "[标量] 饱和符号取反 [8]"
    },
    {
      "name": "vqnegh_s16",
      "full name": "int16_t vqnegh_s16(int16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[scalar] sqneg [16]",
      "function_cn": "[标量] 饱和符号取反 [16]"
    },
    {
      "name": "vqnegs_s32",
      "full name": "int32_t vqnegs_s32(int32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[scalar] sqneg [32]",
      "function_cn": "[标量] 饱和符号取反 [32]"
    },
    {
      "name": "vqnegd_s64",
      "full name": "int64_t vqnegd_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sqneg",
      "function_en": "[scalar] sqneg [64]",
      "function_cn": "[标量] 饱和符号取反 [64]"
    },
    {
      "name": "vcls_s8",
      "full name": "int8x8_t vcls_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cls",
      "function_en": "[vector] cls [8]",
      "function_cn": "[向量] 前导符号位计数 [8]"
    },
    {
      "name": "vclsq_s8",
      "full name": "int8x16_t vclsq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cls",
      "function_en": "[vector] cls [8]",
      "function_cn": "[向量] 前导符号位计数 [8]"
    },
    {
      "name": "vcls_s16",
      "full name": "int16x4_t vcls_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cls",
      "function_en": "[vector] cls [16]",
      "function_cn": "[向量] 前导符号位计数 [16]"
    },
    {
      "name": "vclsq_s16",
      "full name": "int16x8_t vclsq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cls",
      "function_en": "[vector] cls [16]",
      "function_cn": "[向量] 前导符号位计数 [16]"
    },
    {
      "name": "vcls_s32",
      "full name": "int32x2_t vcls_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cls",
      "function_en": "[vector] cls [32]",
      "function_cn": "[向量] 前导符号位计数 [32]"
    },
    {
      "name": "vclsq_s32",
      "full name": "int32x4_t vclsq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cls",
      "function_en": "[vector] cls [32]",
      "function_cn": "[向量] 前导符号位计数 [32]"
    },
    {
      "name": "vclz_s8",
      "full name": "int8x8_t vclz_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [8]",
      "function_cn": "[向量] 前导零计数 [8]"
    },
    {
      "name": "vclzq_s8",
      "full name": "int8x16_t vclzq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [8]",
      "function_cn": "[向量] 前导零计数 [8]"
    },
    {
      "name": "vclz_s16",
      "full name": "int16x4_t vclz_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [8]",
      "function_cn": "[向量] 前导零计数 [8]"
    },
    {
      "name": "vclzq_s16",
      "full name": "int16x8_t vclzq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [16]",
      "function_cn": "[向量] 前导零计数 [16]"
    },
    {
      "name": "vclz_s32",
      "full name": "int32x2_t vclz_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [32]",
      "function_cn": "[向量] 前导零计数 [32]"
    },
    {
      "name": "vclzq_s32",
      "full name": "int32x4_t vclzq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [32]",
      "function_cn": "[向量] 前导零计数 [32]"
    },
    {
      "name": "vclz_u8",
      "full name": "uint8x8_t vclz_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [8]",
      "function_cn": "[向量] 前导零计数 [8]"
    },
    {
      "name": "vclzq_u8",
      "full name": "uint8x16_t vclzq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [8]",
      "function_cn": "[向量] 前导零计数 [8]"
    },
    {
      "name": "vclz_u16",
      "full name": "uint16x4_t vclz_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [16]",
      "function_cn": "[向量] 前导零计数 [16]"
    },
    {
      "name": "vclzq_u16",
      "full name": "uint16x8_t vclzq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [16]",
      "function_cn": "[向量] 前导零计数 [16]"
    },
    {
      "name": "vclz_u32",
      "full name": "uint32x2_t vclz_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [32]",
      "function_cn": "[向量] 前导零计数 [32]"
    },
    {
      "name": "vclzq_u32",
      "full name": "uint32x4_t vclzq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "clz",
      "function_en": "[vector] clz [32]",
      "function_cn": "[向量] 前导零计数 [32]"
    },
    {
      "name": "vcnt_s8",
      "full name": "int8x8_t vcnt_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cnt",
      "function_en": "[vector] cnt [8]",
      "function_cn": "[向量] 设置位计数 [8]"
    },
    {
      "name": "vcntq_s8",
      "full name": "int8x16_t vcntq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cnt",
      "function_en": "[vector] cnt [8]",
      "function_cn": "[向量] 设置位计数 [8]"
    },
    {
      "name": "vcnt_u8",
      "full name": "uint8x8_t vcnt_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cnt",
      "function_en": "[vector] cnt [8]",
      "function_cn": "[向量] 设置位计数 [8]"
    },
    {
      "name": "vcntq_u8",
      "full name": "uint8x16_t vcntq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cnt",
      "function_en": "[vector] cnt [8]",
      "function_cn": "[向量] 设置位计数 [8]"
    },
    {
      "name": "vcnt_p8",
      "full name": "poly8x8_t vcnt_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cnt",
      "function_en": "[vector] cnt [8]",
      "function_cn": "[向量] 设置位计数 [8]"
    },
    {
      "name": "vcntq_p8",
      "full name": "poly8x16_t vcntq_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "cnt",
      "function_en": "[vector] cnt [8]",
      "function_cn": "[向量] 设置位计数 [8]"
    },
    {
      "name": "vrecpe_u32",
      "full name": "uint32x2_t vrecpe_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urecpe",
      "function_en": "[vector] urecpe [32]",
      "function_cn": "[向量] 求近似逆 [32]"
    },
    {
      "name": "vrecpeq_u32",
      "full name": "uint32x4_t vrecpeq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "urecpe",
      "function_en": "[vector] urecpe [32]",
      "function_cn": "[向量] 求近似逆 [32]"
    },
    {
      "name": "vrecpe_f32",
      "full name": "float32x2_t vrecpe_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecpe",
      "function_en": "[vector] frecpe [32]",
      "function_cn": "[向量] 求近似逆 [32]"
    },
    {
      "name": "vrecpeq_f32",
      "full name": "float32x4_t vrecpeq_f32(float32x4_t a)",
      "Intel name": "_mm_rcp_ps",
      "Intel Asm": "rcpps",
      "Arm Asm": "frecpe",
      "function_en": "[vector] frecpe [32]",
      "function_cn": "[向量] 求近似逆 [32]"
    },
    {
      "name": "vrecpe_f64",
      "full name": "float64x1_t vrecpe_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecpe",
      "function_en": "[vector] frecpe [64]",
      "function_cn": "[向量] 求近似逆 [64]"
    },
    {
      "name": "vrecpeq_f64",
      "full name": "float64x2_t vrecpeq_f64(float64x2_t a)",
      "Intel name": "_mm_rcp14_pd",
      "Intel Asm": "vrcp14pd",
      "Arm Asm": "frecpe",
      "function_en": "[vector] frecpe [64]",
      "function_cn": "[向量] 求近似逆 [64]"
    },
    {
      "name": "vrecpes_f32",
      "full name": "float32_t vrecpes_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecpe",
      "function_en": "[scalar] frecpe [32]",
      "function_cn": "[标量] 求近似逆 [32]"
    },
    {
      "name": "vrecped_f64",
      "full name": "float64_t vrecped_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecpe",
      "function_en": "[scalar] frecpe [64]",
      "function_cn": "[标量] 求近似逆 [64]"
    },
    {
      "name": "vrecps_f32",
      "full name": "float32x2_t vrecps_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecps",
      "function_en": "[vector] frecps [32]",
      "function_cn": "[向量] 2.0减去对应元素的乘积 [32]"
    },
    {
      "name": "vrecpsq_f32",
      "full name": "float32x4_t vrecpsq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecps",
      "function_en": "[vector] frecps [32]",
      "function_cn": "[向量] 2.0减去对应元素的乘积 [32]"
    },
    {
      "name": "vrecps_f64",
      "full name": "float64x1_t vrecps_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecps",
      "function_en": "[vector] frecps [64]",
      "function_cn": "[向量] 2.0减去对应元素的乘积 [64]"
    },
    {
      "name": "vrecpsq_f64",
      "full name": "float64x2_t vrecpsq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecps",
      "function_en": "[vector] frecps [64]",
      "function_cn": "[向量] 2.0减去对应元素的乘积 [64]"
    },
    {
      "name": "vrecpss_f32",
      "full name": "float32_t vrecpss_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecps",
      "function_en": "[scalar] frecps [32]",
      "function_cn": "[标量] 2.0减去对应元素的乘积 [32]"
    },
    {
      "name": "vrecpsd_f64",
      "full name": "float64_t vrecpsd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecps",
      "function_en": "[scalar] frecps [64]",
      "function_cn": "[标量] 2.0减去对应元素的乘积 [64]"
    },
    {
      "name": "vsqrt_f32",
      "full name": "float32x2_t vsqrt_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fsqrt",
      "function_en": "[vector] fsqrt [32]",
      "function_cn": "[向量] 计算平方根 [32]"
    },
    {
      "name": "vsqrtq_f32",
      "full name": "float32x4_t vsqrtq_f32(float32x4_t a)",
      "Intel name": "_mm_sqrt_ps",
      "Intel Asm": "sqrtps",
      "Arm Asm": "fsqrt",
      "function_en": "[vector] fsqrt [32]",
      "function_cn": "[向量] 计算平方根 [32]"
    },
    {
      "name": "vsqrt_f64",
      "full name": "float64x1_t vsqrt_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fsqrt",
      "function_en": "[vector] fsqrt [64]",
      "function_cn": "[向量] 计算平方根 [64]"
    },
    {
      "name": "vsqrtq_f64",
      "full name": "float64x2_t vsqrtq_f64(float64x2_t a)",
      "Intel name": "_mm_sqrt_pd",
      "Intel Asm": "sqrtpd",
      "Arm Asm": "fsqrt",
      "function_en": "[vector] fsqrt [64]",
      "function_cn": "[向量] 计算平方根 [64]"
    },
    {
      "name": "vrsqrte_u32",
      "full name": "uint32x2_t vrsqrte_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursqrte",
      "function_en": "[vector] ursqrte [32]",
      "function_cn": "[向量] 计算平方根倒数 [32]"
    },
    {
      "name": "vrsqrteq_u32",
      "full name": "uint32x4_t vrsqrteq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ursqrte",
      "function_en": "[vector] ursqrte [32]",
      "function_cn": "[向量] 计算平方根倒数 [32]"
    },
    {
      "name": "vrsqrte_f32",
      "full name": "float32x2_t vrsqrte_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrte",
      "function_en": "[vector] frsqrte [32]",
      "function_cn": "[向量] 计算平方根倒数 [32]"
    },
    {
      "name": "vrsqrteq_f32",
      "full name": "float32x4_t vrsqrteq_f32(float32x4_t a)",
      "Intel name": "_mm_rsqrt_ps",
      "Intel Asm": "rsqrtps",
      "Arm Asm": "frsqrte",
      "function_en": "[vector] frsqrte [32]",
      "function_cn": "[向量] 计算平方根倒数 [32]"
    },
    {
      "name": "vrsqrte_f64",
      "full name": "float64x1_t vrsqrte_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrte",
      "function_en": "[vector] frsqrte [64]",
      "function_cn": "[向量] 计算平方根倒数 [64]"
    },
    {
      "name": "vrsqrteq_f64",
      "full name": "float64x2_t vrsqrteq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrte",
      "function_en": "[vector] frsqrte [64]",
      "function_cn": "[向量] 计算平方根倒数 [64]"
    },
    {
      "name": "vrsqrtes_f32",
      "full name": "float32_t vrsqrtes_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrte",
      "function_en": "[scalar] frsqrte [32]",
      "function_cn": "[标量] 计算平方根倒数 [32]"
    },
    {
      "name": "vrsqrted_f64",
      "full name": "float64_t vrsqrted_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrte",
      "function_en": "[scalar] frsqrte [64]",
      "function_cn": "[标量] 计算平方根倒数 [64]"
    },
    {
      "name": "vrsqrts_f32",
      "full name": "float32x2_t vrsqrts_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrts",
      "function_en": "[vector] frsqrts [32]",
      "function_cn": "[向量] 3.0减去对应元素的乘积再除以2.0 [32]"
    },
    {
      "name": "vrsqrtsq_f32",
      "full name": "float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrts",
      "function_en": "[vector] frsqrts [32]",
      "function_cn": "[向量] 3.0减去对应元素的乘积再除以2.0 [32]"
    },
    {
      "name": "vrsqrts_f64",
      "full name": "float64x1_t vrsqrts_f64(float64x1_t a, float64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrts",
      "function_en": "[vector] frsqrts [64]",
      "function_cn": "[向量] 3.0减去对应元素的乘积再除以2.0 [64]"
    },
    {
      "name": "vrsqrtsq_f64",
      "full name": "float64x2_t vrsqrtsq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrts",
      "function_en": "[vector] frsqrts [64]",
      "function_cn": "[向量] 3.0减去对应元素的乘积再除以2.0 [64]"
    },
    {
      "name": "vrsqrtss_f32",
      "full name": "float32_t vrsqrtss_f32(float32_t a, float32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrts",
      "function_en": "[scalar] frsqrts [32]",
      "function_cn": "[标量] 3.0减去对应元素的乘积再除以2.0 [32]"
    },
    {
      "name": "vrsqrtsd_f64",
      "full name": "float64_t vrsqrtsd_f64(float64_t a, float64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frsqrts",
      "function_en": "[scalar] frsqrts [32]",
      "function_cn": "[标量] 3.0减去对应元素的乘积再除以2.0 [32]"
    },
    {
      "name": "vmvn_s8",
      "full name": "int8x8_t vmvn_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [8]",
      "function_cn": "[向量] 按位取反 [8]"
    },
    {
      "name": "vmvnq_s8",
      "full name": "int8x16_t vmvnq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [8]",
      "function_cn": "[向量] 按位取反 [8]"
    },
    {
      "name": "vmvn_s16",
      "full name": "int16x4_t vmvn_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [16]",
      "function_cn": "[向量] 按位取反 [16]"
    },
    {
      "name": "vmvnq_s16",
      "full name": "int16x8_t vmvnq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [16]",
      "function_cn": "[向量] 按位取反 [16]"
    },
    {
      "name": "vmvn_s32",
      "full name": "int32x2_t vmvn_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [32]",
      "function_cn": "[向量] 按位取反 [32]"
    },
    {
      "name": "vmvnq_s32",
      "full name": "int32x4_t vmvnq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [32]",
      "function_cn": "[向量] 按位取反 [32]"
    },
    {
      "name": "vmvn_u8",
      "full name": "uint8x8_t vmvn_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [8]",
      "function_cn": "[向量] 按位取反 [8]"
    },
    {
      "name": "vmvnq_u8",
      "full name": "uint8x16_t vmvnq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [8]",
      "function_cn": "[向量] 按位取反 [8]"
    },
    {
      "name": "vmvn_u16",
      "full name": "uint16x4_t vmvn_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [16]",
      "function_cn": "[向量] 按位取反 [16]"
    },
    {
      "name": "vmvnq_u16",
      "full name": "uint16x8_t vmvnq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [16]",
      "function_cn": "[向量] 按位取反 [16]"
    },
    {
      "name": "vmvn_u32",
      "full name": "uint32x2_t vmvn_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [32]",
      "function_cn": "[向量] 按位取反 [32]"
    },
    {
      "name": "vmvnq_u32",
      "full name": "uint32x4_t vmvnq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [32]",
      "function_cn": "[向量] 按位取反 [32]"
    },
    {
      "name": "vmvn_p8",
      "full name": "poly8x8_t vmvn_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [8]",
      "function_cn": "[向量] 按位取反 [8]"
    },
    {
      "name": "vmvnq_p8",
      "full name": "poly8x16_t vmvnq_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "mvn",
      "function_en": "[vector] mvn [8]",
      "function_cn": "[向量] 按位取反 [8]"
    },
    {
      "name": "vand_s8",
      "full name": "int8x8_t vand_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [8]",
      "function_cn": "[向量] 按位与 [8]"
    },
    {
      "name": "vandq_s8",
      "full name": "int8x16_t vandq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [8]",
      "function_cn": "[向量] 按位与 [8]"
    },
    {
      "name": "vand_s16",
      "full name": "int16x4_t vand_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [16]",
      "function_cn": "[向量] 按位与 [16]"
    },
    {
      "name": "vandq_s16",
      "full name": "int16x8_t vandq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [16]",
      "function_cn": "[向量] 按位与 [16]"
    },
    {
      "name": "vand_s32",
      "full name": "int32x2_t vand_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [32]",
      "function_cn": "[向量] 按位与 [32]"
    },
    {
      "name": "vandq_s32",
      "full name": "int32x4_t vandq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm512_and_epi32",
      "Intel Asm": "vpandd",
      "Arm Asm": "and",
      "function_en": "[vector] and [32]",
      "function_cn": "[向量] 按位与 [32]"
    },
    {
      "name": "vand_s64",
      "full name": "int64x1_t vand_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "_mm_and_si64",
      "Intel Asm": "pand",
      "Arm Asm": "and",
      "function_en": "[vector] and [64]",
      "function_cn": "[向量] 按位与 [64]"
    },
    {
      "name": "vandq_s64",
      "full name": "int64x2_t vandq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm512_and_epi64",
      "Intel Asm": "vpandq",
      "Arm Asm": "and",
      "function_en": "[vector] and [64]",
      "function_cn": "[向量] 按位与 [64]"
    },
    {
      "name": "vand_u8",
      "full name": "uint8x8_t vand_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [8]",
      "function_cn": "[向量] 按位与 [8]"
    },
    {
      "name": "vandq_u8",
      "full name": "uint8x16_t vandq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [8]",
      "function_cn": "[向量] 按位与 [8]"
    },
    {
      "name": "vand_u16",
      "full name": "uint16x4_t vand_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [16]",
      "function_cn": "[向量] 按位与 [16]"
    },
    {
      "name": "vandq_u16",
      "full name": "uint16x8_t vandq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [16]",
      "function_cn": "[向量] 按位与 [16]"
    },
    {
      "name": "vand_u32",
      "full name": "uint32x2_t vand_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [32]",
      "function_cn": "[向量] 按位与 [32]"
    },
    {
      "name": "vandq_u32",
      "full name": "uint32x4_t vandq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [32]",
      "function_cn": "[向量] 按位与 [32]"
    },
    {
      "name": "vand_u64",
      "full name": "uint64x1_t vand_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [64]",
      "function_cn": "[向量] 按位与 [64]"
    },
    {
      "name": "vandq_u64",
      "full name": "uint64x2_t vandq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "and",
      "function_en": "[vector] and [64]",
      "function_cn": "[向量] 按位与 [64]"
    },
    {
      "name": "vorr_s8",
      "full name": "int8x8_t vorr_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [8]",
      "function_cn": "[向量] 按位或 [8]"
    },
    {
      "name": "vorrq_s8",
      "full name": "int8x16_t vorrq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [8]",
      "function_cn": "[向量] 按位或 [8]"
    },
    {
      "name": "vorr_s16",
      "full name": "int16x4_t vorr_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [16]",
      "function_cn": "[向量] 按位或 [16]"
    },
    {
      "name": "vorrq_s16",
      "full name": "int16x8_t vorrq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [16]",
      "function_cn": "[向量] 按位或 [16]"
    },
    {
      "name": "vorr_s32",
      "full name": "int32x2_t vorr_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [32]",
      "function_cn": "[向量] 按位或 [32]"
    },
    {
      "name": "vorrq_s32",
      "full name": "int32x4_t vorrq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_or_epi32",
      "Intel Asm": "vpord",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [32]",
      "function_cn": "[向量] 按位或 [32]"
    },
    {
      "name": "vorr_s64",
      "full name": "int64x1_t vorr_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "_mm_or_si64",
      "Intel Asm": "por",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [64]",
      "function_cn": "[向量] 按位或 [64]"
    },
    {
      "name": "vorrq_s64",
      "full name": "int64x2_t vorrq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_or_epi64",
      "Intel Asm": "vporq",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [64]",
      "function_cn": "[向量] 按位或 [64]"
    },
    {
      "name": "vorr_u8",
      "full name": "uint8x8_t vorr_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [8]",
      "function_cn": "[向量] 按位或 [8]"
    },
    {
      "name": "vorrq_u8",
      "full name": "uint8x16_t vorrq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [8]",
      "function_cn": "[向量] 按位或 [8]"
    },
    {
      "name": "vorr_u16",
      "full name": "uint16x4_t vorr_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [16]",
      "function_cn": "[向量] 按位或 [16]"
    },
    {
      "name": "vorrq_u16",
      "full name": "uint16x8_t vorrq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [16]",
      "function_cn": "[向量] 按位或 [16]"
    },
    {
      "name": "vorr_u32",
      "full name": "uint32x2_t vorr_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [32]",
      "function_cn": "[向量] 按位或 [32]"
    },
    {
      "name": "vorrq_u32",
      "full name": "uint32x4_t vorrq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [32]",
      "function_cn": "[向量] 按位或 [32]"
    },
    {
      "name": "vorr_u64",
      "full name": "uint64x1_t vorr_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [64]",
      "function_cn": "[向量] 按位或 [64]"
    },
    {
      "name": "vorrq_u64",
      "full name": "uint64x2_t vorrq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orr",
      "function_en": "[vector] orr [64]",
      "function_cn": "[向量] 按位或 [64]"
    },
    {
      "name": "veor_s8",
      "full name": "int8x8_t veor_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [8]",
      "function_cn": "[向量] 按位异或 [8]"
    },
    {
      "name": "veorq_s8",
      "full name": "int8x16_t veorq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [8]",
      "function_cn": "[向量] 按位异或 [8]"
    },
    {
      "name": "veor_s16",
      "full name": "int16x4_t veor_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [16]",
      "function_cn": "[向量] 按位异或 [16]"
    },
    {
      "name": "veorq_s16",
      "full name": "int16x8_t veorq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [16]",
      "function_cn": "[向量] 按位异或 [16]"
    },
    {
      "name": "veor_s32",
      "full name": "int32x2_t veor_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [32]",
      "function_cn": "[向量] 按位异或 [32]"
    },
    {
      "name": "veorq_s32",
      "full name": "int32x4_t veorq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_xor_epi32",
      "Intel Asm": "vpxord",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [32]",
      "function_cn": "[向量] 按位异或 [32]"
    },
    {
      "name": "veor_s64",
      "full name": "int64x1_t veor_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "_mm_xor_si64",
      "Intel Asm": "pxor",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [64]",
      "function_cn": "[向量] 按位异或 [64]"
    },
    {
      "name": "veorq_s64",
      "full name": "int64x2_t veorq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_xor_epi64",
      "Intel Asm": "vpxorq",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [64]",
      "function_cn": "[向量] 按位异或 [64]"
    },
    {
      "name": "veor_u8",
      "full name": "uint8x8_t veor_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [8]",
      "function_cn": "[向量] 按位异或 [8]"
    },
    {
      "name": "veorq_u8",
      "full name": "uint8x16_t veorq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [8]",
      "function_cn": "[向量] 按位异或 [8]"
    },
    {
      "name": "veor_u16",
      "full name": "uint16x4_t veor_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [16]",
      "function_cn": "[向量] 按位异或 [16]"
    },
    {
      "name": "veorq_u16",
      "full name": "uint16x8_t veorq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [16]",
      "function_cn": "[向量] 按位异或 [16]"
    },
    {
      "name": "veor_u32",
      "full name": "uint32x2_t veor_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [32]",
      "function_cn": "[向量] 按位异或 [32]"
    },
    {
      "name": "veorq_u32",
      "full name": "uint32x4_t veorq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [32]",
      "function_cn": "[向量] 按位异或 [32]"
    },
    {
      "name": "veor_u64",
      "full name": "uint64x1_t veor_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [64]",
      "function_cn": "[向量] 按位异或 [64]"
    },
    {
      "name": "veorq_u64",
      "full name": "uint64x2_t veorq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "eor",
      "function_en": "[vector] eor [64]",
      "function_cn": "[向量] 按位异或 [64]"
    },
    {
      "name": "vbic_s8",
      "full name": "int8x8_t vbic_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [8]"
    },
    {
      "name": "vbicq_s8",
      "full name": "int8x16_t vbicq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [8]"
    },
    {
      "name": "vbic_s16",
      "full name": "int16x4_t vbic_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [8]"
    },
    {
      "name": "vbicq_s16",
      "full name": "int16x8_t vbicq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [16]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [16]"
    },
    {
      "name": "vbic_s32",
      "full name": "int32x2_t vbic_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [32]"
    },
    {
      "name": "vbicq_s32",
      "full name": "int32x4_t vbicq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm512_andnot_epi32",
      "Intel Asm": "vpandnd",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [32]"
    },
    {
      "name": "vbic_s64",
      "full name": "int64x1_t vbic_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "_mm_andnot_si64",
      "Intel Asm": "pandn",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [64]"
    },
    {
      "name": "vbicq_s64",
      "full name": "int64x2_t vbicq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm512_andnot_epi64",
      "Intel Asm": "vpandnq",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [64]"
    },
    {
      "name": "vbic_u8",
      "full name": "uint8x8_t vbic_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [8]"
    },
    {
      "name": "vbicq_u8",
      "full name": "uint8x16_t vbicq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [8]"
    },
    {
      "name": "vbic_u16",
      "full name": "uint16x4_t vbic_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [16]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [16]"
    },
    {
      "name": "vbicq_u16",
      "full name": "uint16x8_t vbicq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [16]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [16]"
    },
    {
      "name": "vbic_u32",
      "full name": "uint32x2_t vbic_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [32]"
    },
    {
      "name": "vbicq_u32",
      "full name": "uint32x4_t vbicq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [32]"
    },
    {
      "name": "vbic_u64",
      "full name": "uint64x1_t vbic_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [64]"
    },
    {
      "name": "vbicq_u64",
      "full name": "uint64x2_t vbicq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bic",
      "function_en": "[vector] bic [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位与 [64]"
    },
    {
      "name": "vorn_s8",
      "full name": "int8x8_t vorn_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [8]"
    },
    {
      "name": "vornq_s8",
      "full name": "int8x16_t vornq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [8]"
    },
    {
      "name": "vorn_s16",
      "full name": "int16x4_t vorn_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [16]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [16]"
    },
    {
      "name": "vornq_s16",
      "full name": "int16x8_t vornq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [16]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [16]"
    },
    {
      "name": "vorn_s32",
      "full name": "int32x2_t vorn_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [32]"
    },
    {
      "name": "vornq_s32",
      "full name": "int32x4_t vornq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [32]"
    },
    {
      "name": "vorn_s64",
      "full name": "int64x1_t vorn_s64(int64x1_t a, int64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [64]"
    },
    {
      "name": "vornq_s64",
      "full name": "int64x2_t vornq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [64]"
    },
    {
      "name": "vorn_u8",
      "full name": "uint8x8_t vorn_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [8]"
    },
    {
      "name": "vornq_u8",
      "full name": "uint8x16_t vornq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [8]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [8]"
    },
    {
      "name": "vorn_u16",
      "full name": "uint16x4_t vorn_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [16]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [16]"
    },
    {
      "name": "vornq_u16",
      "full name": "uint16x8_t vornq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [16]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [16]"
    },
    {
      "name": "vorn_u32",
      "full name": "uint32x2_t vorn_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [32]"
    },
    {
      "name": "vornq_u32",
      "full name": "uint32x4_t vornq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [32]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [32]"
    },
    {
      "name": "vorn_u64",
      "full name": "uint64x1_t vorn_u64(uint64x1_t a, uint64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [64]"
    },
    {
      "name": "vornq_u64",
      "full name": "uint64x2_t vornq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "orn",
      "function_en": "[vector] orn [64]",
      "function_cn": "[向量] 向量b中每个元素按位取反后和a中对应元素按位或 [64]"
    },
    {
      "name": "vbsl_s8",
      "full name": "int8x8_t vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [8]",
      "function_cn": "[向量] 按位选择 [8]"
    },
    {
      "name": "vbslq_s8",
      "full name": "int8x16_t vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [8]",
      "function_cn": "[向量] 按位选择 [8]"
    },
    {
      "name": "vbsl_s16",
      "full name": "int16x4_t vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [16]",
      "function_cn": "[向量] 按位选择 [16]"
    },
    {
      "name": "vbslq_s16",
      "full name": "int16x8_t vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [16]",
      "function_cn": "[向量] 按位选择 [16]"
    },
    {
      "name": "vbsl_s32",
      "full name": "int32x2_t vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [32]",
      "function_cn": "[向量] 按位选择 [32]"
    },
    {
      "name": "vbslq_s32",
      "full name": "int32x4_t vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [32]",
      "function_cn": "[向量] 按位选择 [32]"
    },
    {
      "name": "vbsl_s64",
      "full name": "int64x1_t vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vbslq_s64",
      "full name": "int64x2_t vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vbsl_u8",
      "full name": "uint8x8_t vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [8]",
      "function_cn": "[向量] 按位选择 [8]"
    },
    {
      "name": "vbslq_u8",
      "full name": "uint8x16_t vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [8]",
      "function_cn": "[向量] 按位选择 [8]"
    },
    {
      "name": "vbsl_u16",
      "full name": "uint16x4_t vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [16]",
      "function_cn": "[向量] 按位选择 [16]"
    },
    {
      "name": "vbslq_u16",
      "full name": "uint16x8_t vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [16]",
      "function_cn": "[向量] 按位选择 [16]"
    },
    {
      "name": "vbsl_u32",
      "full name": "uint32x2_t vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [32]",
      "function_cn": "[向量] 按位选择 [32]"
    },
    {
      "name": "vbslq_u32",
      "full name": "uint32x4_t vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [32]",
      "function_cn": "[向量] 按位选择 [32]"
    },
    {
      "name": "vbsl_u64",
      "full name": "uint64x1_t vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vbslq_u64",
      "full name": "uint64x2_t vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vbsl_p64",
      "full name": "poly64x1_t vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vbslq_p64",
      "full name": "poly64x2_t vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vbsl_f32",
      "full name": "float32x2_t vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [32]",
      "function_cn": "[向量] 按位选择 [32]"
    },
    {
      "name": "vbslq_f32",
      "full name": "float32x4_t vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [32]",
      "function_cn": "[向量] 按位选择 [32]"
    },
    {
      "name": "vbsl_p8",
      "full name": "poly8x8_t vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [8]",
      "function_cn": "[向量] 按位选择 [8]"
    },
    {
      "name": "vbslq_p8",
      "full name": "poly8x16_t vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [8]",
      "function_cn": "[向量] 按位选择 [8]"
    },
    {
      "name": "vbsl_p16",
      "full name": "poly16x4_t vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [16]",
      "function_cn": "[向量] 按位选择 [16]"
    },
    {
      "name": "vbslq_p16",
      "full name": "poly16x8_t vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [16]",
      "function_cn": "[向量] 按位选择 [16]"
    },
    {
      "name": "vbsl_f64",
      "full name": "float64x1_t vbsl_f64(uint64x1_t a, float64x1_t b, float64x1_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vbslq_f64",
      "full name": "float64x2_t vbslq_f64(uint64x2_t a, float64x2_t b, float64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "bsl",
      "function_en": "[vector] bsl [64]",
      "function_cn": "[向量] 按位选择 [64]"
    },
    {
      "name": "vcopy_lane_s8",
      "full name": "int8x8_t vcopy_lane_s8(int8x8_t a, const int lane1, int8x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopyq_lane_s8",
      "full name": "int8x16_t vcopyq_lane_s8(int8x16_t a, const int lane1, int8x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopy_lane_s16",
      "full name": "int16x4_t vcopy_lane_s16(int16x4_t a, const int lane1, int16x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopyq_lane_s16",
      "full name": "int16x8_t vcopyq_lane_s16(int16x8_t a, const int lane1, int16x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopy_lane_s32",
      "full name": "int32x2_t vcopy_lane_s32(int32x2_t a, const int lane1, int32x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopyq_lane_s32",
      "full name": "int32x4_t vcopyq_lane_s32(int32x4_t a, const int lane1, int32x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopy_lane_s64",
      "full name": "int64x1_t vcopy_lane_s64(int64x1_t a, const int lane1, int64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_lane_s64",
      "full name": "int64x2_t vcopyq_lane_s64(int64x2_t a, const int lane1, int64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_lane_u8",
      "full name": "uint8x8_t vcopy_lane_u8(uint8x8_t a, const int lane1, uint8x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopyq_lane_u8",
      "full name": "uint8x16_t vcopyq_lane_u8(uint8x16_t a, const int lane1, uint8x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopy_lane_u16",
      "full name": "uint16x4_t vcopy_lane_u16(uint16x4_t a, const int lane1, uint16x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopyq_lane_u16",
      "full name": "uint16x8_t vcopyq_lane_u16(uint16x8_t a, const int lane1, uint16x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopy_lane_u32",
      "full name": "uint32x2_t vcopy_lane_u32(uint32x2_t a, const int lane1, uint32x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopyq_lane_u32",
      "full name": "uint32x4_t vcopyq_lane_u32(uint32x4_t a, const int lane1, uint32x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopy_lane_u64",
      "full name": "uint64x1_t vcopy_lane_u64(uint64x1_t a, const int lane1, uint64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_lane_u64",
      "full name": "uint64x2_t vcopyq_lane_u64(uint64x2_t a, const int lane1, uint64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_lane_p64",
      "full name": "poly64x1_t vcopy_lane_p64(poly64x1_t a, const int lane1, poly64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_lane_p64",
      "full name": "poly64x2_t vcopyq_lane_p64(poly64x2_t a, const int lane1, poly64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_lane_f32",
      "full name": "float32x2_t vcopy_lane_f32(float32x2_t a, const int lane1, float32x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopyq_lane_f32",
      "full name": "float32x4_t vcopyq_lane_f32(float32x4_t a, const int lane1, float32x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopy_lane_f64",
      "full name": "float64x1_t vcopy_lane_f64(float64x1_t a, const int lane1, float64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_lane_f64",
      "full name": "float64x2_t vcopyq_lane_f64(float64x2_t a, const int lane1, float64x1_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_lane_p8",
      "full name": "poly8x8_t vcopy_lane_p8(poly8x8_t a, const int lane1, poly8x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopyq_lane_p8",
      "full name": "poly8x16_t vcopyq_lane_p8(poly8x16_t a, const int lane1, poly8x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopy_lane_p16",
      "full name": "poly16x4_t vcopy_lane_p16(poly16x4_t a, const int lane1, poly16x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopyq_lane_p16",
      "full name": "poly16x8_t vcopyq_lane_p16(poly16x8_t a, const int lane1, poly16x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopy_laneq_s8",
      "full name": "int8x8_t vcopy_laneq_s8(int8x8_t a, const int lane1, int8x16_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopyq_laneq_s8",
      "full name": "int8x16_t vcopyq_laneq_s8(int8x16_t a, const int lane1, int8x16_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopy_laneq_s16",
      "full name": "int16x4_t vcopy_laneq_s16(int16x4_t a, const int lane1, int16x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopyq_laneq_s16",
      "full name": "int16x8_t vcopyq_laneq_s16(int16x8_t a, const int lane1, int16x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopy_laneq_s32",
      "full name": "int32x2_t vcopy_laneq_s32(int32x2_t a, const int lane1, int32x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopyq_laneq_s32",
      "full name": "int32x4_t vcopyq_laneq_s32(int32x4_t a, const int lane1, int32x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopy_laneq_s64",
      "full name": "int64x1_t vcopy_laneq_s64(int64x1_t a, const int lane1, int64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_laneq_s64",
      "full name": "int64x2_t vcopyq_laneq_s64(int64x2_t a, const int lane1, int64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_laneq_u8",
      "full name": "uint8x8_t vcopy_laneq_u8(uint8x8_t a, const int lane1, uint8x16_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopyq_laneq_u8",
      "full name": "uint8x16_t vcopyq_laneq_u8(uint8x16_t a, const int lane1, uint8x16_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopy_laneq_u16",
      "full name": "uint16x4_t vcopy_laneq_u16(uint16x4_t a, const int lane1, uint16x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopyq_laneq_u16",
      "full name": "uint16x8_t vcopyq_laneq_u16(uint16x8_t a, const int lane1, uint16x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopy_laneq_u32",
      "full name": "uint32x2_t vcopy_laneq_u32(uint32x2_t a, const int lane1, uint32x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopyq_laneq_u32",
      "full name": "uint32x4_t vcopyq_laneq_u32(uint32x4_t a, const int lane1, uint32x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopy_laneq_u64",
      "full name": "uint64x1_t vcopy_laneq_u64(uint64x1_t a, const int lane1, uint64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_laneq_u64",
      "full name": "uint64x2_t vcopyq_laneq_u64(uint64x2_t a, const int lane1, uint64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_laneq_p64",
      "full name": "poly64x1_t vcopy_laneq_p64(poly64x1_t a, const int lane1, poly64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_laneq_p64",
      "full name": "poly64x2_t vcopyq_laneq_p64(poly64x2_t a, const int lane1, poly64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_laneq_f32",
      "full name": "float32x2_t vcopy_laneq_f32(float32x2_t a, const int lane1, float32x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopyq_laneq_f32",
      "full name": "float32x4_t vcopyq_laneq_f32(float32x4_t a, const int lane1, float32x4_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 向量拷贝 [32]"
    },
    {
      "name": "vcopy_laneq_f64",
      "full name": "float64x1_t vcopy_laneq_f64(float64x1_t a, const int lane1, float64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopyq_laneq_f64",
      "full name": "float64x2_t vcopyq_laneq_f64(float64x2_t a, const int lane1, float64x2_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 向量拷贝 [64]"
    },
    {
      "name": "vcopy_laneq_p8",
      "full name": "poly8x8_t vcopy_laneq_p8(poly8x8_t a, const int lane1, poly8x16_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopyq_laneq_p8",
      "full name": "poly8x16_t vcopyq_laneq_p8(poly8x16_t a, const int lane1, poly8x16_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 向量拷贝 [8]"
    },
    {
      "name": "vcopy_laneq_p16",
      "full name": "poly16x4_t vcopy_laneq_p16(poly16x4_t a, const int lane1, poly16x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vcopyq_laneq_p16",
      "full name": "poly16x8_t vcopyq_laneq_p16(poly16x8_t a, const int lane1, poly16x8_t b, const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 向量拷贝 [16]"
    },
    {
      "name": "vrbit_s8",
      "full name": "int8x8_t vrbit_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rbit",
      "function_en": "[vector] rbit [8]",
      "function_cn": "[向量] 反转元素的位 [8]"
    },
    {
      "name": "vrbitq_s8",
      "full name": "int8x16_t vrbitq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rbit",
      "function_en": "[vector] rbit [8]",
      "function_cn": "[向量] 反转元素的位 [8]"
    },
    {
      "name": "vrbit_u8",
      "full name": "uint8x8_t vrbit_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rbit",
      "function_en": "[vector] rbit [8]",
      "function_cn": "[向量] 反转元素的位 [8]"
    },
    {
      "name": "vrbitq_u8",
      "full name": "uint8x16_t vrbitq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rbit",
      "function_en": "[vector] rbit [8]",
      "function_cn": "[向量] 反转元素的位 [8]"
    },
    {
      "name": "vrbit_p8",
      "full name": "poly8x8_t vrbit_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rbit",
      "function_en": "[vector] rbit [8]",
      "function_cn": "[向量] 反转元素的位 [8]"
    },
    {
      "name": "vrbitq_p8",
      "full name": "poly8x16_t vrbitq_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rbit",
      "function_en": "[vector] rbit [8]",
      "function_cn": "[向量] 反转元素的位 [8]"
    },
    {
      "name": "vcreate_s8",
      "full name": "int8x8_t vcreate_s8(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [8]"
    },
    {
      "name": "vcreate_s16",
      "full name": "int16x4_t vcreate_s16(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [16]"
    },
    {
      "name": "vcreate_s32",
      "full name": "int32x2_t vcreate_s32(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [32]"
    },
    {
      "name": "vcreate_s64",
      "full name": "int64x1_t vcreate_s64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [64]"
    },
    {
      "name": "vcreate_u8",
      "full name": "uint8x8_t vcreate_u8(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [8]"
    },
    {
      "name": "vcreate_u16",
      "full name": "uint16x4_t vcreate_u16(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [16]"
    },
    {
      "name": "vcreate_u32",
      "full name": "uint32x2_t vcreate_u32(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [32]"
    },
    {
      "name": "vcreate_u64",
      "full name": "uint64x1_t vcreate_u64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [64]"
    },
    {
      "name": "vcreate_p64",
      "full name": "poly64x1_t vcreate_p64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [64]"
    },
    {
      "name": "vcreate_f16",
      "full name": "float16x4_t vcreate_f16(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [16]"
    },
    {
      "name": "vcreate_f32",
      "full name": "float32x2_t vcreate_f32(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [32]"
    },
    {
      "name": "vcreate_p8",
      "full name": "poly8x8_t vcreate_p8(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [8]"
    },
    {
      "name": "vcreate_p16",
      "full name": "poly16x4_t vcreate_p16(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [16]"
    },
    {
      "name": "vcreate_f64",
      "full name": "float64x1_t vcreate_f64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 标量拷贝到向量寄存器 [64]"
    },
    {
      "name": "vdup_n_s8",
      "full name": "int8x8_t vdup_n_s8(int8_t value)",
      "Intel name": "_mm_set1_pi8",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_n_s8",
      "full name": "int8x16_t vdupq_n_s8(int8_t value)",
      "Intel name": "_mm_set1_epi8",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vdup_n_s16",
      "full name": "int16x4_t vdup_n_s16(int16_t value)",
      "Intel name": "_mm_set1_pi16",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_n_s16",
      "full name": "int16x8_t vdupq_n_s16(int16_t value)",
      "Intel name": "_mm_set1_epi16",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vdup_n_s32",
      "full name": "int32x2_t vdup_n_s32(int32_t value)",
      "Intel name": "_mm_set1_pi32",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_n_s32",
      "full name": "int32x4_t vdupq_n_s32(int32_t value)",
      "Intel name": "_mm_set1_epi32",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [3232]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vdup_n_s64",
      "full name": "int64x1_t vdup_n_s64(int64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_n_s64",
      "full name": "int64x2_t vdupq_n_s64(int64_t value)",
      "Intel name": "_mm_set1_epi64",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdup_n_u8",
      "full name": "uint8x8_t vdup_n_u8(uint8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_n_u8",
      "full name": "uint8x16_t vdupq_n_u8(uint8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vdup_n_u16",
      "full name": "uint16x4_t vdup_n_u16(uint16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_n_u16",
      "full name": "uint16x8_t vdupq_n_u16(uint16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vdup_n_u32",
      "full name": "uint32x2_t vdup_n_u32(uint32_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_n_u32",
      "full name": "uint32x4_t vdupq_n_u32(uint32_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vdup_n_u64",
      "full name": "uint64x1_t vdup_n_u64(uint64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_n_u64",
      "full name": "uint64x2_t vdupq_n_u64(uint64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdup_n_p64",
      "full name": "poly64x1_t vdup_n_p64(poly64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_n_p64",
      "full name": "poly64x2_t vdupq_n_p64(poly64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdup_n_f32",
      "full name": "float32x2_t vdup_n_f32(float32_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_n_f32",
      "full name": "float32x4_t vdupq_n_f32(float32_t value)",
      "Intel name": "_mm_set1_ps",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vdup_n_p8",
      "full name": "poly8x8_t vdup_n_p8(poly8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_n_p8",
      "full name": "poly8x16_t vdupq_n_p8(poly8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vdup_n_p16",
      "full name": "poly16x4_t vdup_n_p16(poly16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_n_p16",
      "full name": "poly16x8_t vdupq_n_p16(poly16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vdup_n_f64",
      "full name": "float64x1_t vdup_n_f64(float64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_n_f64",
      "full name": "float64x2_t vdupq_n_f64(float64_t value)",
      "Intel name": "_mm_set1_pd",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vmov_n_s8",
      "full name": "int8x8_t vmov_n_s8(int8_t value)",
      "Intel name": "_mm_set1_pi8",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vmovq_n_s8",
      "full name": "int8x16_t vmovq_n_s8(int8_t value)",
      "Intel name": "_mm_set1_epi8",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vmov_n_s16",
      "full name": "int16x4_t vmov_n_s16(int16_t value)",
      "Intel name": "_mm_set1_pi16",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vmovq_n_s16",
      "full name": "int16x8_t vmovq_n_s16(int16_t value)",
      "Intel name": "_mm_set1_epi16",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vmov_n_s32",
      "full name": "int32x2_t vmov_n_s32(int32_t value)",
      "Intel name": "_mm_set1_pi32",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vmovq_n_s32",
      "full name": "int32x4_t vmovq_n_s32(int32_t value)",
      "Intel name": "_mm_set1_epi32",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vmov_n_s64",
      "full name": "int64x1_t vmov_n_s64(int64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vmovq_n_s64",
      "full name": "int64x2_t vmovq_n_s64(int64_t value)",
      "Intel name": "_mm_set1_epi64",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vmov_n_u8",
      "full name": "uint8x8_t vmov_n_u8(uint8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vmovq_n_u8",
      "full name": "uint8x16_t vmovq_n_u8(uint8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vmov_n_u16",
      "full name": "uint16x4_t vmov_n_u16(uint16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vmovq_n_u16",
      "full name": "uint16x8_t vmovq_n_u16(uint16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vmov_n_u32",
      "full name": "uint32x2_t vmov_n_u32(uint32_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vmovq_n_u32",
      "full name": "uint32x4_t vmovq_n_u32(uint32_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vmov_n_u64",
      "full name": "uint64x1_t vmov_n_u64(uint64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vmovq_n_u64",
      "full name": "uint64x2_t vmovq_n_u64(uint64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vmov_n_f32",
      "full name": "float32x2_t vmov_n_f32(float32_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vmovq_n_f32",
      "full name": "float32x4_t vmovq_n_f32(float32_t value)",
      "Intel name": "_mm_set1_ps",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [32]"
    },
    {
      "name": "vmov_n_p8",
      "full name": "poly8x8_t vmov_n_p8(poly8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vmovq_n_p8",
      "full name": "poly8x16_t vmovq_n_p8(poly8_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [8]"
    },
    {
      "name": "vmov_n_p16",
      "full name": "poly16x4_t vmov_n_p16(poly16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vmovq_n_p16",
      "full name": "poly16x8_t vmovq_n_p16(poly16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [16]"
    },
    {
      "name": "vmov_n_f64",
      "full name": "float64x1_t vmov_n_f64(float64_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vmovq_n_f64",
      "full name": "float64x2_t vmovq_n_f64(float64_t value)",
      "Intel name": "_mm_set1_pd",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制一个标量值到向量中的每个元素 [64]"
    },
    {
      "name": "vdup_lane_s8",
      "full name": "int8x8_t vdup_lane_s8(int8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_lane_s8",
      "full name": "int8x16_t vdupq_lane_s8(int8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdup_lane_s16",
      "full name": "int16x4_t vdup_lane_s16(int16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_lane_s16",
      "full name": "int16x8_t vdupq_lane_s16(int16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdup_lane_s32",
      "full name": "int32x2_t vdup_lane_s32(int32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_lane_s32",
      "full name": "int32x4_t vdupq_lane_s32(int32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdup_lane_s64",
      "full name": "int64x1_t vdup_lane_s64(int64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_lane_s64",
      "full name": "int64x2_t vdupq_lane_s64(int64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdup_lane_u8",
      "full name": "uint8x8_t vdup_lane_u8(uint8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_lane_u8",
      "full name": "uint8x16_t vdupq_lane_u8(uint8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdup_lane_u16",
      "full name": "uint16x4_t vdup_lane_u16(uint16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_lane_u16",
      "full name": "uint16x8_t vdupq_lane_u16(uint16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdup_lane_u32",
      "full name": "uint32x2_t vdup_lane_u32(uint32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_lane_u32",
      "full name": "uint32x4_t vdupq_lane_u32(uint32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdup_lane_u64",
      "full name": "uint64x1_t vdup_lane_u64(uint64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_lane_u64",
      "full name": "uint64x2_t vdupq_lane_u64(uint64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdup_lane_p64",
      "full name": "poly64x1_t vdup_lane_p64(poly64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_lane_p64",
      "full name": "poly64x2_t vdupq_lane_p64(poly64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdup_lane_f32",
      "full name": "float32x2_t vdup_lane_f32(float32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_lane_f32",
      "full name": "float32x4_t vdupq_lane_f32(float32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdup_lane_p8",
      "full name": "poly8x8_t vdup_lane_p8(poly8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_lane_p8",
      "full name": "poly8x16_t vdupq_lane_p8(poly8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdup_lane_p16",
      "full name": "poly16x4_t vdup_lane_p16(poly16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_lane_p16",
      "full name": "poly16x8_t vdupq_lane_p16(poly16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdup_lane_f64",
      "full name": "float64x1_t vdup_lane_f64(float64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_lane_f64",
      "full name": "float64x2_t vdupq_lane_f64(float64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdup_laneq_s8",
      "full name": "int8x8_t vdup_laneq_s8(int8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_laneq_s8",
      "full name": "int8x16_t vdupq_laneq_s8(int8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdup_laneq_s16",
      "full name": "int16x4_t vdup_laneq_s16(int16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_laneq_s16",
      "full name": "int16x8_t vdupq_laneq_s16(int16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdup_laneq_s32",
      "full name": "int32x2_t vdup_laneq_s32(int32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_laneq_s32",
      "full name": "int32x4_t vdupq_laneq_s32(int32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdup_laneq_s64",
      "full name": "int64x1_t vdup_laneq_s64(int64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_laneq_s64",
      "full name": "int64x2_t vdupq_laneq_s64(int64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdup_laneq_u8",
      "full name": "uint8x8_t vdup_laneq_u8(uint8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_laneq_u8",
      "full name": "uint8x16_t vdupq_laneq_u8(uint8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdup_laneq_u16",
      "full name": "uint16x4_t vdup_laneq_u16(uint16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_laneq_u16",
      "full name": "uint16x8_t vdupq_laneq_u16(uint16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdup_laneq_u32",
      "full name": "uint32x2_t vdup_laneq_u32(uint32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_laneq_u32",
      "full name": "uint32x4_t vdupq_laneq_u32(uint32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdup_laneq_u64",
      "full name": "uint64x1_t vdup_laneq_u64(uint64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_laneq_u64",
      "full name": "uint64x2_t vdupq_laneq_u64(uint64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdup_laneq_p64",
      "full name": "poly64x1_t vdup_laneq_p64(poly64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_laneq_p64",
      "full name": "poly64x2_t vdupq_laneq_p64(poly64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdup_laneq_f32",
      "full name": "float32x2_t vdup_laneq_f32(float32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdupq_laneq_f32",
      "full name": "float32x4_t vdupq_laneq_f32(float32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [32]"
    },
    {
      "name": "vdup_laneq_p8",
      "full name": "poly8x8_t vdup_laneq_p8(poly8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdupq_laneq_p8",
      "full name": "poly8x16_t vdupq_laneq_p8(poly8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [8]"
    },
    {
      "name": "vdup_laneq_p16",
      "full name": "poly16x4_t vdup_laneq_p16(poly16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdupq_laneq_p16",
      "full name": "poly16x8_t vdupq_laneq_p16(poly16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [16]"
    },
    {
      "name": "vdup_laneq_f64",
      "full name": "float64x1_t vdup_laneq_f64(float64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vdupq_laneq_f64",
      "full name": "float64x2_t vdupq_laneq_f64(float64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制指定的向量元素到目标向量中的每个元素 [64]"
    },
    {
      "name": "vcombine_s8",
      "full name": "int8x16_t vcombine_s8(int8x8_t low, int8x8_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [8]",
      "function_cn": "[向量] 向量元素组合 [8]"
    },
    {
      "name": "vcombine_s16",
      "full name": "int16x8_t vcombine_s16(int16x4_t low, int16x4_t high)",
      "Intel name": "_mm_packs_epi16",
      "Intel Asm": "packsswb",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [16]",
      "function_cn": "[向量] 向量元素组合 [16]"
    },
    {
      "name": "vcombine_s32",
      "full name": "int32x4_t vcombine_s32(int32x2_t low, int32x2_t high)",
      "Intel name": "_mm_packs_epi32",
      "Intel Asm": "packssdw",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [32]",
      "function_cn": "[向量] 向量元素组合 [32]"
    },
    {
      "name": "vcombine_s64",
      "full name": "int64x2_t vcombine_s64(int64x1_t low, int64x1_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [64]",
      "function_cn": "[向量] 向量元素组合 [64]"
    },
    {
      "name": "vcombine_u8",
      "full name": "uint8x16_t vcombine_u8(uint8x8_t low, uint8x8_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [8]",
      "function_cn": "[向量] 向量元素组合 [8]"
    },
    {
      "name": "vcombine_u16",
      "full name": "uint16x8_t vcombine_u16(uint16x4_t low, uint16x4_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [16]",
      "function_cn": "[向量] 向量元素组合 [16]"
    },
    {
      "name": "vcombine_u32",
      "full name": "uint32x4_t vcombine_u32(uint32x2_t low, uint32x2_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [32]",
      "function_cn": "[向量] 向量元素组合 [32]"
    },
    {
      "name": "vcombine_u64",
      "full name": "uint64x2_t vcombine_u64(uint64x1_t low, uint64x1_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [64]",
      "function_cn": "[向量] 向量元素组合 [64]"
    },
    {
      "name": "vcombine_p64",
      "full name": "poly64x2_t vcombine_p64(poly64x1_t low, poly64x1_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [64]",
      "function_cn": "[向量] 向量元素组合 [64]"
    },
    {
      "name": "vcombine_f16",
      "full name": "float16x8_t vcombine_f16(float16x4_t low, float16x4_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [16]",
      "function_cn": "[向量] 向量元素组合 [16]"
    },
    {
      "name": "vcombine_f32",
      "full name": "float32x4_t vcombine_f32(float32x2_t low, float32x2_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [32]",
      "function_cn": "[向量] 向量元素组合 [32]"
    },
    {
      "name": "vcombine_p8",
      "full name": "poly8x16_t vcombine_p8(poly8x8_t low, poly8x8_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [8]",
      "function_cn": "[向量] 向量元素组合 [8]"
    },
    {
      "name": "vcombine_p16",
      "full name": "poly16x8_t vcombine_p16(poly16x4_t low, poly16x4_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [16]",
      "function_cn": "[向量] 向量元素组合 [16]"
    },
    {
      "name": "vcombine_f64",
      "full name": "float64x2_t vcombine_f64(float64x1_t low, float64x1_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup ins",
      "function_en": "[vector] dup ins [64]",
      "function_cn": "[向量] 向量元素组合 [64]"
    },
    {
      "name": "vget_high_s8",
      "full name": "int8x8_t vget_high_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 获取向量的高半部分元素 [8]"
    },
    {
      "name": "vget_high_s16",
      "full name": "int16x4_t vget_high_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的高半部分元素 [16]"
    },
    {
      "name": "vget_high_s32",
      "full name": "int32x2_t vget_high_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 获取向量的高半部分元素 [32]"
    },
    {
      "name": "vget_high_s64",
      "full name": "int64x1_t vget_high_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的高半部分元素 [64]"
    },
    {
      "name": "vget_high_u8",
      "full name": "uint8x8_t vget_high_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 获取向量的高半部分元素 [8]"
    },
    {
      "name": "vget_high_u16",
      "full name": "uint16x4_t vget_high_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的高半部分元素 [16]"
    },
    {
      "name": "vget_high_u32",
      "full name": "uint32x2_t vget_high_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 获取向量的高半部分元素 [32]"
    },
    {
      "name": "vget_high_u64",
      "full name": "uint64x1_t vget_high_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的高半部分元素 [64]"
    },
    {
      "name": "vget_high_p64",
      "full name": "poly64x1_t vget_high_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的高半部分元素 [64]"
    },
    {
      "name": "vget_high_f16",
      "full name": "float16x4_t vget_high_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的高半部分元素 [16]"
    },
    {
      "name": "vget_high_f32",
      "full name": "float32x2_t vget_high_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 获取向量的高半部分元素 [32]"
    },
    {
      "name": "vget_high_p8",
      "full name": "poly8x8_t vget_high_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 获取向量的高半部分元素 [8]"
    },
    {
      "name": "vget_high_p16",
      "full name": "poly16x4_t vget_high_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的高半部分元素 [16]"
    },
    {
      "name": "vget_high_f64",
      "full name": "float64x1_t vget_high_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的高半部分元素 [64]"
    },
    {
      "name": "vget_low_s8",
      "full name": "int8x8_t vget_low_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 获取向量的低半部分元素 [8]"
    },
    {
      "name": "vget_low_s16",
      "full name": "int16x4_t vget_low_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的低半部分元素 [16]"
    },
    {
      "name": "vget_low_s32",
      "full name": "int32x2_t vget_low_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 获取向量的低半部分元素 [32]"
    },
    {
      "name": "vget_low_s64",
      "full name": "int64x1_t vget_low_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的低半部分元素 [64]"
    },
    {
      "name": "vget_low_u8",
      "full name": "uint8x8_t vget_low_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 获取向量的低半部分元素 [8]"
    },
    {
      "name": "vget_low_u16",
      "full name": "uint16x4_t vget_low_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的低半部分元素 [16]"
    },
    {
      "name": "vget_low_u32",
      "full name": "uint32x2_t vget_low_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 获取向量的低半部分元素 [32]"
    },
    {
      "name": "vget_low_u64",
      "full name": "uint64x1_t vget_low_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的低半部分元素 [64]"
    },
    {
      "name": "vget_low_p64",
      "full name": "poly64x1_t vget_low_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的低半部分元素 [64]"
    },
    {
      "name": "vget_low_f16",
      "full name": "float16x4_t vget_low_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的低半部分元素 [16]"
    },
    {
      "name": "vget_low_f32",
      "full name": "float32x2_t vget_low_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 获取向量的低半部分元素 [32]"
    },
    {
      "name": "vget_low_p8",
      "full name": "poly8x8_t vget_low_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 获取向量的低半部分元素 [8]"
    },
    {
      "name": "vget_low_p16",
      "full name": "poly16x4_t vget_low_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 获取向量的低半部分元素 [16]"
    },
    {
      "name": "vget_low_f64",
      "full name": "float64x1_t vget_low_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 获取向量的低半部分元素 [64]"
    },
    {
      "name": "vdupb_lane_s8",
      "full name": "int8_t vdupb_lane_s8(int8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [8]"
    },
    {
      "name": "vduph_lane_s16",
      "full name": "int16_t vduph_lane_s16(int16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [16]"
    },
    {
      "name": "vdups_lane_s32",
      "full name": "int32_t vdups_lane_s32(int32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [32]"
    },
    {
      "name": "vdupd_lane_s64",
      "full name": "int64_t vdupd_lane_s64(int64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [64]"
    },
    {
      "name": "vdupb_lane_u8",
      "full name": "uint8_t vdupb_lane_u8(uint8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [8]"
    },
    {
      "name": "vduph_lane_u16",
      "full name": "uint16_t vduph_lane_u16(uint16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [16]"
    },
    {
      "name": "vdups_lane_u32",
      "full name": "uint32_t vdups_lane_u32(uint32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [32]"
    },
    {
      "name": "vdupd_lane_u64",
      "full name": "uint64_t vdupd_lane_u64(uint64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [64]"
    },
    {
      "name": "vdups_lane_f32",
      "full name": "float32_t vdups_lane_f32(float32x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [32]"
    },
    {
      "name": "vdupd_lane_f64",
      "full name": "float64_t vdupd_lane_f64(float64x1_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [64]"
    },
    {
      "name": "vdupb_lane_p8",
      "full name": "poly8_t vdupb_lane_p8(poly8x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [8]"
    },
    {
      "name": "vduph_lane_p16",
      "full name": "poly16_t vduph_lane_p16(poly16x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [16]"
    },
    {
      "name": "vdupb_laneq_s8",
      "full name": "int8_t vdupb_laneq_s8(int8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [8]"
    },
    {
      "name": "vduph_laneq_s16",
      "full name": "int16_t vduph_laneq_s16(int16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [16]"
    },
    {
      "name": "vdups_laneq_s32",
      "full name": "int32_t vdups_laneq_s32(int32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [32]"
    },
    {
      "name": "vdupd_laneq_s64",
      "full name": "int64_t vdupd_laneq_s64(int64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [64]"
    },
    {
      "name": "vdupb_laneq_u8",
      "full name": "uint8_t vdupb_laneq_u8(uint8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [8]"
    },
    {
      "name": "vduph_laneq_u16",
      "full name": "uint16_t vduph_laneq_u16(uint16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [16]"
    },
    {
      "name": "vdups_laneq_u32",
      "full name": "uint32_t vdups_laneq_u32(uint32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [32]"
    },
    {
      "name": "vdupd_laneq_u64",
      "full name": "uint64_t vdupd_laneq_u64(uint64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [64]"
    },
    {
      "name": "vdups_laneq_f32",
      "full name": "float32_t vdups_laneq_f32(float32x4_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [32]"
    },
    {
      "name": "vdupd_laneq_f64",
      "full name": "float64_t vdupd_laneq_f64(float64x2_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [64]"
    },
    {
      "name": "vdupb_laneq_p8",
      "full name": "poly8_t vdupb_laneq_p8(poly8x16_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [8]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [8]"
    },
    {
      "name": "vduph_laneq_p16",
      "full name": "poly16_t vduph_laneq_p16(poly16x8_t vec, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 复制向量中指定的一个元素到目标寄存器 [16]"
    },
    {
      "name": "vld1_s8",
      "full name": "int8x8_t vld1_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [8]"
    },
    {
      "name": "vld1q_s8",
      "full name": "int8x16_t vld1q_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [8]"
    },
    {
      "name": "vld1_s16",
      "full name": "int16x4_t vld1_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1q_s16",
      "full name": "int16x8_t vld1q_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1_s32",
      "full name": "int32x2_t vld1_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [32]"
    },
    {
      "name": "vld1q_s32",
      "full name": "int32x4_t vld1q_s32(int32_t const * ptr)",
      "Intel name": "_mm_load_epi32",
      "Intel Asm": "vmovdqa32",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [32]"
    },
    {
      "name": "vld1_s64",
      "full name": "int64x1_t vld1_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1q_s64",
      "full name": "int64x2_t vld1q_s64(int64_t const * ptr)",
      "Intel name": "_mm_load_epi64",
      "Intel Asm": "vmovdqa64",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1_u8",
      "full name": "uint8x8_t vld1_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [8]"
    },
    {
      "name": "vld1q_u8",
      "full name": "uint8x16_t vld1q_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [8]"
    },
    {
      "name": "vld1_u16",
      "full name": "uint16x4_t vld1_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1q_u16",
      "full name": "uint16x8_t vld1q_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1_u32",
      "full name": "uint32x2_t vld1_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [32]"
    },
    {
      "name": "vld1q_u32",
      "full name": "uint32x4_t vld1q_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [32]"
    },
    {
      "name": "vld1_u64",
      "full name": "uint64x1_t vld1_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1q_u64",
      "full name": "uint64x2_t vld1q_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1_p64",
      "full name": "poly64x1_t vld1_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1q_p64",
      "full name": "poly64x2_t vld1q_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1_f16",
      "full name": "float16x4_t vld1_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1q_f16",
      "full name": "float16x8_t vld1q_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1_f32",
      "full name": "float32x2_t vld1_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [32]"
    },
    {
      "name": "vld1q_f32",
      "full name": "float32x4_t vld1q_f32(float32_t const * ptr)",
      "Intel name": "_mm_load_ps",
      "Intel Asm": "movaps",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [32]"
    },
    {
      "name": "vld1_p8",
      "full name": "poly8x8_t vld1_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [8]"
    },
    {
      "name": "vld1q_p8",
      "full name": "poly8x16_t vld1q_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [8]"
    },
    {
      "name": "vld1_p16",
      "full name": "poly16x4_t vld1_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1q_p16",
      "full name": "poly16x8_t vld1q_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [16]"
    },
    {
      "name": "vld1_f64",
      "full name": "float64x1_t vld1_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1q_f64",
      "full name": "float64x2_t vld1q_f64(float64_t const * ptr)",
      "Intel name": "_mm_load_pd",
      "Intel Asm": "movapd",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入向量寄存器 [64]"
    },
    {
      "name": "vld1_lane_s8",
      "full name": "int8x8_t vld1_lane_s8(int8_t const * ptr, int8x8_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [8]"
    },
    {
      "name": "vld1q_lane_s8",
      "full name": "int8x16_t vld1q_lane_s8(int8_t const * ptr, int8x16_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [8]"
    },
    {
      "name": "vld1_lane_s16",
      "full name": "int16x4_t vld1_lane_s16(int16_t const * ptr, int16x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1q_lane_s16",
      "full name": "int16x8_t vld1q_lane_s16(int16_t const * ptr, int16x8_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1_lane_s32",
      "full name": "int32x2_t vld1_lane_s32(int32_t const * ptr, int32x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [32]"
    },
    {
      "name": "vld1q_lane_s32",
      "full name": "int32x4_t vld1q_lane_s32(int32_t const * ptr, int32x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [32]"
    },
    {
      "name": "vld1_lane_s64",
      "full name": "int64x1_t vld1_lane_s64(int64_t const * ptr, int64x1_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1q_lane_s64",
      "full name": "int64x2_t vld1q_lane_s64(int64_t const * ptr, int64x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1_lane_u8",
      "full name": "uint8x8_t vld1_lane_u8(uint8_t const * ptr, uint8x8_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [8]"
    },
    {
      "name": "vld1q_lane_u8",
      "full name": "uint8x16_t vld1q_lane_u8(uint8_t const * ptr, uint8x16_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [8]"
    },
    {
      "name": "vld1_lane_u16",
      "full name": "uint16x4_t vld1_lane_u16(uint16_t const * ptr, uint16x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1q_lane_u16",
      "full name": "uint16x8_t vld1q_lane_u16(uint16_t const * ptr, uint16x8_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1_lane_u32",
      "full name": "uint32x2_t vld1_lane_u32(uint32_t const * ptr, uint32x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [32]"
    },
    {
      "name": "vld1q_lane_u32",
      "full name": "uint32x4_t vld1q_lane_u32(uint32_t const * ptr, uint32x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [32]"
    },
    {
      "name": "vld1_lane_u64",
      "full name": "uint64x1_t vld1_lane_u64(uint64_t const * ptr, uint64x1_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1q_lane_u64",
      "full name": "uint64x2_t vld1q_lane_u64(uint64_t const * ptr, uint64x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1_lane_p64",
      "full name": "poly64x1_t vld1_lane_p64(poly64_t const * ptr, poly64x1_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1q_lane_p64",
      "full name": "poly64x2_t vld1q_lane_p64(poly64_t const * ptr, poly64x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1_lane_f16",
      "full name": "float16x4_t vld1_lane_f16(float16_t const * ptr, float16x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1q_lane_f16",
      "full name": "float16x8_t vld1q_lane_f16(float16_t const * ptr, float16x8_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1_lane_f32",
      "full name": "float32x2_t vld1_lane_f32(float32_t const * ptr, float32x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [32]"
    },
    {
      "name": "vld1q_lane_f32",
      "full name": "float32x4_t vld1q_lane_f32(float32_t const * ptr, float32x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [32]"
    },
    {
      "name": "vld1_lane_p8",
      "full name": "poly8x8_t vld1_lane_p8(poly8_t const * ptr, poly8x8_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [8]"
    },
    {
      "name": "vld1q_lane_p8",
      "full name": "poly8x16_t vld1q_lane_p8(poly8_t const * ptr, poly8x16_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [8]"
    },
    {
      "name": "vld1_lane_p16",
      "full name": "poly16x4_t vld1_lane_p16(poly16_t const * ptr, poly16x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1q_lane_p16",
      "full name": "poly16x8_t vld1q_lane_p16(poly16_t const * ptr, poly16x8_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [16]"
    },
    {
      "name": "vld1_lane_f64",
      "full name": "float64x1_t vld1_lane_f64(float64_t const * ptr, float64x1_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1q_lane_f64",
      "full name": "float64x2_t vld1q_lane_f64(float64_t const * ptr, float64x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将数组中的数据写入指定的向量元素 [64]"
    },
    {
      "name": "vld1_dup_s8",
      "full name": "int8x8_t vld1_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [8]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [8]"
    },
    {
      "name": "vld1q_dup_s8",
      "full name": "int8x16_t vld1q_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [8]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [8]"
    },
    {
      "name": "vld1_dup_s16",
      "full name": "int16x4_t vld1_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1q_dup_s16",
      "full name": "int16x8_t vld1q_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1_dup_s32",
      "full name": "int32x2_t vld1_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [32]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [32]"
    },
    {
      "name": "vld1q_dup_s32",
      "full name": "int32x4_t vld1q_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [32]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [32]"
    },
    {
      "name": "vld1_dup_s64",
      "full name": "int64x1_t vld1_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vld1q_dup_s64",
      "full name": "int64x2_t vld1q_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vld1_dup_u8",
      "full name": "uint8x8_t vld1_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [8]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [8]"
    },
    {
      "name": "vld1q_dup_u8",
      "full name": "uint8x16_t vld1q_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [8]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [8]"
    },
    {
      "name": "vld1_dup_u16",
      "full name": "uint16x4_t vld1_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1q_dup_u16",
      "full name": "uint16x8_t vld1q_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1_dup_u32",
      "full name": "uint32x2_t vld1_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [32]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [32]"
    },
    {
      "name": "vld1q_dup_u32",
      "full name": "uint32x4_t vld1q_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [32]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [32]"
    },
    {
      "name": "vld1_dup_u64",
      "full name": "uint64x1_t vld1_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vld1q_dup_u64",
      "full name": "uint64x2_t vld1q_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vld1_dup_p64",
      "full name": "poly64x1_t vld1_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vld1q_dup_p64",
      "full name": "poly64x2_t vld1q_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vld1_dup_f16",
      "full name": "float16x4_t vld1_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1q_dup_f16",
      "full name": "float16x8_t vld1q_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1_dup_f32",
      "full name": "float32x2_t vld1_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [32]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [32]"
    },
    {
      "name": "vld1q_dup_f32",
      "full name": "float32x4_t vld1q_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [32]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [32]"
    },
    {
      "name": "vld1_dup_p8",
      "full name": "poly8x8_t vld1_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [8]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [8]"
    },
    {
      "name": "vld1q_dup_p8",
      "full name": "poly8x16_t vld1q_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [8]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [8]"
    },
    {
      "name": "vld1_dup_p16",
      "full name": "poly16x4_t vld1_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1q_dup_p16",
      "full name": "poly16x8_t vld1q_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [16]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [16]"
    },
    {
      "name": "vld1_dup_f64",
      "full name": "float64x1_t vld1_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vld1q_dup_f64",
      "full name": "float64x2_t vld1q_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1r",
      "function_en": "[vector] ld1r [64]",
      "function_cn": "[向量] 加载指针中的数据并复制到目标向量寄存器 [64]"
    },
    {
      "name": "vst1_s8",
      "full name": "void vst1_s8(int8_t * ptr, int8x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1q_s8",
      "full name": "void vst1q_s8(int8_t * ptr, int8x16_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1_s16",
      "full name": "void vst1_s16(int16_t * ptr, int16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_s16",
      "full name": "void vst1q_s16(int16_t * ptr, int16x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_s32",
      "full name": "void vst1_s32(int32_t * ptr, int32x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1q_s32",
      "full name": "void vst1q_s32(int32_t * ptr, int32x4_t val)",
      "Intel name": "_mm_store_epi32",
      "Intel Asm": "vmovdqa32",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1_s64",
      "full name": "void vst1_s64(int64_t * ptr, int64x1_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_s64",
      "full name": "void vst1q_s64(int64_t * ptr, int64x2_t val)",
      "Intel name": "_mm_store_epi64",
      "Intel Asm": "vmovdqa64",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1_u8",
      "full name": "void vst1_u8(uint8_t * ptr, uint8x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1q_u8",
      "full name": "void vst1q_u8(uint8_t * ptr, uint8x16_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1_u16",
      "full name": "void vst1_u16(uint16_t * ptr, uint16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_u16",
      "full name": "void vst1q_u16(uint16_t * ptr, uint16x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_u32",
      "full name": "void vst1_u32(uint32_t * ptr, uint32x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1q_u32",
      "full name": "void vst1q_u32(uint32_t * ptr, uint32x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1_u64",
      "full name": "void vst1_u64(uint64_t * ptr, uint64x1_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_u64",
      "full name": "void vst1q_u64(uint64_t * ptr, uint64x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1_p64",
      "full name": "void vst1_p64(poly64_t * ptr, poly64x1_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_p64",
      "full name": "void vst1q_p64(poly64_t * ptr, poly64x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1_f16",
      "full name": "void vst1_f16(float16_t * ptr, float16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_f16",
      "full name": "void vst1q_f16(float16_t * ptr, float16x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_f32",
      "full name": "void vst1_f32(float32_t * ptr, float32x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1q_f32",
      "full name": "void vst1q_f32(float32_t * ptr, float32x4_t val)",
      "Intel name": "_mm_store_ps",
      "Intel Asm": "movaps",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1_p8",
      "full name": "void vst1_p8(poly8_t * ptr, poly8x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1q_p8",
      "full name": "void vst1q_p8(poly8_t * ptr, poly8x16_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1_p16",
      "full name": "void vst1_p16(poly16_t * ptr, poly16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_p16",
      "full name": "void vst1q_p16(poly16_t * ptr, poly16x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_f64",
      "full name": "void vst1_f64(float64_t * ptr, float64x1_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_f64",
      "full name": "void vst1q_f64(float64_t * ptr, float64x2_t val)",
      "Intel name": "_mm_store_pd",
      "Intel Asm": "movapd",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的数据写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1_lane_s8",
      "full name": "void vst1_lane_s8(int8_t * ptr, int8x8_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1q_lane_s8",
      "full name": "void vst1q_lane_s8(int8_t * ptr, int8x16_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1_lane_s16",
      "full name": "void vst1_lane_s16(int16_t * ptr, int16x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_lane_s16",
      "full name": "void vst1q_lane_s16(int16_t * ptr, int16x8_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_lane_s32",
      "full name": "void vst1_lane_s32(int32_t * ptr, int32x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1q_lane_s32",
      "full name": "void vst1q_lane_s32(int32_t * ptr, int32x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1_lane_s64",
      "full name": "void vst1_lane_s64(int64_t * ptr, int64x1_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_lane_s64",
      "full name": "void vst1q_lane_s64(int64_t * ptr, int64x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1_lane_u8",
      "full name": "void vst1_lane_u8(uint8_t * ptr, uint8x8_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1q_lane_u8",
      "full name": "void vst1q_lane_u8(uint8_t * ptr, uint8x16_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1_lane_u16",
      "full name": "void vst1_lane_u16(uint16_t * ptr, uint16x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_lane_u16",
      "full name": "void vst1q_lane_u16(uint16_t * ptr, uint16x8_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_lane_u32",
      "full name": "void vst1_lane_u32(uint32_t * ptr, uint32x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1q_lane_u32",
      "full name": "void vst1q_lane_u32(uint32_t * ptr, uint32x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1_lane_u64",
      "full name": "void vst1_lane_u64(uint64_t * ptr, uint64x1_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_lane_u64",
      "full name": "void vst1q_lane_u64(uint64_t * ptr, uint64x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1_lane_p64",
      "full name": "void vst1_lane_p64(poly64_t * ptr, poly64x1_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_lane_p64",
      "full name": "void vst1q_lane_p64(poly64_t * ptr, poly64x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1_lane_f16",
      "full name": "void vst1_lane_f16(float16_t * ptr, float16x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_lane_f16",
      "full name": "void vst1q_lane_f16(float16_t * ptr, float16x8_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_lane_f32",
      "full name": "void vst1_lane_f32(float32_t * ptr, float32x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1q_lane_f32",
      "full name": "void vst1q_lane_f32(float32_t * ptr, float32x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [32]"
    },
    {
      "name": "vst1_lane_p8",
      "full name": "void vst1_lane_p8(poly8_t * ptr, poly8x8_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1q_lane_p8",
      "full name": "void vst1q_lane_p8(poly8_t * ptr, poly8x16_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [8]"
    },
    {
      "name": "vst1_lane_p16",
      "full name": "void vst1_lane_p16(poly16_t * ptr, poly16x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1q_lane_p16",
      "full name": "void vst1q_lane_p16(poly16_t * ptr, poly16x8_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [16]"
    },
    {
      "name": "vst1_lane_f64",
      "full name": "void vst1_lane_f64(float64_t * ptr, float64x1_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vst1q_lane_f64",
      "full name": "void vst1q_lane_f64(float64_t * ptr, float64x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量中指定元素的内容写入指针所指向的内存 [64]"
    },
    {
      "name": "vld2_s8",
      "full name": "int8x8x2_t vld2_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [8]"
    },
    {
      "name": "vld2q_s8",
      "full name": "int8x16x2_t vld2q_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [8]"
    },
    {
      "name": "vld2_s16",
      "full name": "int16x4x2_t vld2_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2q_s16",
      "full name": "int16x8x2_t vld2q_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2_s32",
      "full name": "int32x2x2_t vld2_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [32]"
    },
    {
      "name": "vld2q_s32",
      "full name": "int32x4x2_t vld2q_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [32]"
    },
    {
      "name": "vld2_u8",
      "full name": "uint8x8x2_t vld2_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [8]"
    },
    {
      "name": "vld2q_u8",
      "full name": "uint8x16x2_t vld2q_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [8]"
    },
    {
      "name": "vld2_u16",
      "full name": "uint16x4x2_t vld2_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2q_u16",
      "full name": "uint16x8x2_t vld2q_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2_u32",
      "full name": "uint32x2x2_t vld2_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [32]"
    },
    {
      "name": "vld2q_u32",
      "full name": "uint32x4x2_t vld2q_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [32]"
    },
    {
      "name": "vld2_f16",
      "full name": "float16x4x2_t vld2_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2q_f16",
      "full name": "float16x8x2_t vld2q_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2_f32",
      "full name": "float32x2x2_t vld2_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [32]"
    },
    {
      "name": "vld2q_f32",
      "full name": "float32x4x2_t vld2q_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [32]"
    },
    {
      "name": "vld2_p8",
      "full name": "poly8x8x2_t vld2_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [8]"
    },
    {
      "name": "vld2q_p8",
      "full name": "poly8x16x2_t vld2q_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [8]"
    },
    {
      "name": "vld2_p16",
      "full name": "poly16x4x2_t vld2_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2q_p16",
      "full name": "poly16x8x2_t vld2q_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [16]"
    },
    {
      "name": "vld2_s64",
      "full name": "int64x1x2_t vld2_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld2_u64",
      "full name": "uint64x1x2_t vld2_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld2_p64",
      "full name": "poly64x1x2_t vld2_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld2q_s64",
      "full name": "int64x2x2_t vld2q_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld2q_u64",
      "full name": "uint64x2x2_t vld2q_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld2q_p64",
      "full name": "poly64x2x2_t vld2q_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld2_f64",
      "full name": "float64x1x2_t vld2_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld2q_f64",
      "full name": "float64x2x2_t vld2q_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入2个向量寄存器 [64]"
    },
    {
      "name": "vld3_s8",
      "full name": "int8x8x3_t vld3_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [8]"
    },
    {
      "name": "vld3q_s8",
      "full name": "int8x16x3_t vld3q_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [8]"
    },
    {
      "name": "vld3_s16",
      "full name": "int16x4x3_t vld3_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3q_s16",
      "full name": "int16x8x3_t vld3q_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3_s32",
      "full name": "int32x2x3_t vld3_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [32]"
    },
    {
      "name": "vld3q_s32",
      "full name": "int32x4x3_t vld3q_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [32]"
    },
    {
      "name": "vld3_u8",
      "full name": "uint8x8x3_t vld3_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [8]"
    },
    {
      "name": "vld3q_u8",
      "full name": "uint8x16x3_t vld3q_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [8]"
    },
    {
      "name": "vld3_u16",
      "full name": "uint16x4x3_t vld3_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3q_u16",
      "full name": "uint16x8x3_t vld3q_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3_u32",
      "full name": "uint32x2x3_t vld3_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [32]"
    },
    {
      "name": "vld3q_u32",
      "full name": "uint32x4x3_t vld3q_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [32]"
    },
    {
      "name": "vld3_f16",
      "full name": "float16x4x3_t vld3_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3q_f16",
      "full name": "float16x8x3_t vld3q_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3_f32",
      "full name": "float32x2x3_t vld3_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [32]"
    },
    {
      "name": "vld3q_f32",
      "full name": "float32x4x3_t vld3q_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [32]"
    },
    {
      "name": "vld3_p8",
      "full name": "poly8x8x3_t vld3_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [8]"
    },
    {
      "name": "vld3q_p8",
      "full name": "poly8x16x3_t vld3q_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [8]"
    },
    {
      "name": "vld3_p16",
      "full name": "poly16x4x3_t vld3_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3q_p16",
      "full name": "poly16x8x3_t vld3q_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [16]"
    },
    {
      "name": "vld3_s64",
      "full name": "int64x1x3_t vld3_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld3_u64",
      "full name": "uint64x1x3_t vld3_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld3_p64",
      "full name": "poly64x1x3_t vld3_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld3q_s64",
      "full name": "int64x2x3_t vld3q_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld3q_u64",
      "full name": "uint64x2x3_t vld3q_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld3q_p64",
      "full name": "poly64x2x3_t vld3q_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld3_f64",
      "full name": "float64x1x3_t vld3_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld3q_f64",
      "full name": "float64x2x3_t vld3q_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入3个向量寄存器 [64]"
    },
    {
      "name": "vld4_s8",
      "full name": "int8x8x4_t vld4_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [8]"
    },
    {
      "name": "vld4q_s8",
      "full name": "int8x16x4_t vld4q_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [8]"
    },
    {
      "name": "vld4_s16",
      "full name": "int16x4x4_t vld4_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4q_s16",
      "full name": "int16x8x4_t vld4q_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4_s32",
      "full name": "int32x2x4_t vld4_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [32]"
    },
    {
      "name": "vld4q_s32",
      "full name": "int32x4x4_t vld4q_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [32]"
    },
    {
      "name": "vld4_u8",
      "full name": "uint8x8x4_t vld4_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [8]"
    },
    {
      "name": "vld4q_u8",
      "full name": "uint8x16x4_t vld4q_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [8]"
    },
    {
      "name": "vld4_u16",
      "full name": "uint16x4x4_t vld4_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4q_u16",
      "full name": "uint16x8x4_t vld4q_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4_u32",
      "full name": "uint32x2x4_t vld4_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [32]"
    },
    {
      "name": "vld4q_u32",
      "full name": "uint32x4x4_t vld4q_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [32]"
    },
    {
      "name": "vld4_f16",
      "full name": "float16x4x4_t vld4_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4q_f16",
      "full name": "float16x8x4_t vld4q_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4_f32",
      "full name": "float32x2x4_t vld4_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [32]"
    },
    {
      "name": "vld4q_f32",
      "full name": "float32x4x4_t vld4q_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [32]"
    },
    {
      "name": "vld4_p8",
      "full name": "poly8x8x4_t vld4_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [8]"
    },
    {
      "name": "vld4q_p8",
      "full name": "poly8x16x4_t vld4q_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [8]"
    },
    {
      "name": "vld4_p16",
      "full name": "poly16x4x4_t vld4_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4q_p16",
      "full name": "poly16x8x4_t vld4q_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [16]"
    },
    {
      "name": "vld4_s64",
      "full name": "int64x1x4_t vld4_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld4_u64",
      "full name": "uint64x1x4_t vld4_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld4_p64",
      "full name": "poly64x1x4_t vld4_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld4q_s64",
      "full name": "int64x2x4_t vld4q_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld4q_u64",
      "full name": "uint64x2x4_t vld4q_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld4q_p64",
      "full name": "poly64x2x4_t vld4q_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld4_f64",
      "full name": "float64x1x4_t vld4_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld4q_f64",
      "full name": "float64x2x4_t vld4q_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针所指向的内存中的内容写入4个向量寄存器 [64]"
    },
    {
      "name": "vld2_dup_s8",
      "full name": "int8x8x2_t vld2_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [8]"
    },
    {
      "name": "vld2q_dup_s8",
      "full name": "int8x16x2_t vld2q_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [8]"
    },
    {
      "name": "vld2_dup_s16",
      "full name": "int16x4x2_t vld2_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2q_dup_s16",
      "full name": "int16x8x2_t vld2q_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2_dup_s32",
      "full name": "int32x2x2_t vld2_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [32]"
    },
    {
      "name": "vld2q_dup_s32",
      "full name": "int32x4x2_t vld2q_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [32]"
    },
    {
      "name": "vld2_dup_u8",
      "full name": "uint8x8x2_t vld2_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [8]"
    },
    {
      "name": "vld2q_dup_u8",
      "full name": "uint8x16x2_t vld2q_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [8]"
    },
    {
      "name": "vld2_dup_u16",
      "full name": "uint16x4x2_t vld2_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2q_dup_u16",
      "full name": "uint16x8x2_t vld2q_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2_dup_u32",
      "full name": "uint32x2x2_t vld2_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [32]"
    },
    {
      "name": "vld2q_dup_u32",
      "full name": "uint32x4x2_t vld2q_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [32]"
    },
    {
      "name": "vld2_dup_f16",
      "full name": "float16x4x2_t vld2_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2q_dup_f16",
      "full name": "float16x8x2_t vld2q_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2_dup_f32",
      "full name": "float32x2x2_t vld2_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [32]"
    },
    {
      "name": "vld2q_dup_f32",
      "full name": "float32x4x2_t vld2q_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [32]"
    },
    {
      "name": "vld2_dup_p8",
      "full name": "poly8x8x2_t vld2_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [8]"
    },
    {
      "name": "vld2q_dup_p8",
      "full name": "poly8x16x2_t vld2q_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [8]"
    },
    {
      "name": "vld2_dup_p16",
      "full name": "poly16x4x2_t vld2_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2q_dup_p16",
      "full name": "poly16x8x2_t vld2q_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [16]"
    },
    {
      "name": "vld2_dup_s64",
      "full name": "int64x1x2_t vld2_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld2_dup_u64",
      "full name": "uint64x1x2_t vld2_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld2_dup_p64",
      "full name": "poly64x1x2_t vld2_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld2q_dup_s64",
      "full name": "int64x2x2_t vld2q_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld2q_dup_u64",
      "full name": "uint64x2x2_t vld2q_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld2q_dup_p64",
      "full name": "poly64x2x2_t vld2q_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld2_dup_f64",
      "full name": "float64x1x2_t vld2_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld2q_dup_f64",
      "full name": "float64x2x2_t vld2q_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2r",
      "function_en": "[vector] ld2r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到2个向量中的每个元素 [64]"
    },
    {
      "name": "vld3_dup_s8",
      "full name": "int8x8x3_t vld3_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [8]"
    },
    {
      "name": "vld3q_dup_s8",
      "full name": "int8x16x3_t vld3q_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [8]"
    },
    {
      "name": "vld3_dup_s16",
      "full name": "int16x4x3_t vld3_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3q_dup_s16",
      "full name": "int16x8x3_t vld3q_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3_dup_s32",
      "full name": "int32x2x3_t vld3_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [32]"
    },
    {
      "name": "vld3q_dup_s32",
      "full name": "int32x4x3_t vld3q_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [32]"
    },
    {
      "name": "vld3_dup_u8",
      "full name": "uint8x8x3_t vld3_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [8]"
    },
    {
      "name": "vld3q_dup_u8",
      "full name": "uint8x16x3_t vld3q_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [8]"
    },
    {
      "name": "vld3_dup_u16",
      "full name": "uint16x4x3_t vld3_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3q_dup_u16",
      "full name": "uint16x8x3_t vld3q_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3_dup_u32",
      "full name": "uint32x2x3_t vld3_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [32]"
    },
    {
      "name": "vld3q_dup_u32",
      "full name": "uint32x4x3_t vld3q_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [32]"
    },
    {
      "name": "vld3_dup_f16",
      "full name": "float16x4x3_t vld3_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3q_dup_f16",
      "full name": "float16x8x3_t vld3q_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3_dup_f32",
      "full name": "float32x2x3_t vld3_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [32]"
    },
    {
      "name": "vld3q_dup_f32",
      "full name": "float32x4x3_t vld3q_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [32]"
    },
    {
      "name": "vld3_dup_p8",
      "full name": "poly8x8x3_t vld3_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [8]"
    },
    {
      "name": "vld3q_dup_p8",
      "full name": "poly8x16x3_t vld3q_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [8]"
    },
    {
      "name": "vld3_dup_p16",
      "full name": "poly16x4x3_t vld3_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3q_dup_p16",
      "full name": "poly16x8x3_t vld3q_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [16]"
    },
    {
      "name": "vld3_dup_s64",
      "full name": "int64x1x3_t vld3_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld3_dup_u64",
      "full name": "uint64x1x3_t vld3_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld3_dup_p64",
      "full name": "poly64x1x3_t vld3_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld3q_dup_s64",
      "full name": "int64x2x3_t vld3q_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld3q_dup_u64",
      "full name": "uint64x2x3_t vld3q_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld3q_dup_p64",
      "full name": "poly64x2x3_t vld3q_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld3_dup_f64",
      "full name": "float64x1x3_t vld3_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld3q_dup_f64",
      "full name": "float64x2x3_t vld3q_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3r",
      "function_en": "[vector] ld3r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到3个向量中的每个元素 [64]"
    },
    {
      "name": "vld4_dup_s8",
      "full name": "int8x8x4_t vld4_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [8]"
    },
    {
      "name": "vld4q_dup_s8",
      "full name": "int8x16x4_t vld4q_dup_s8(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [8]"
    },
    {
      "name": "vld4_dup_s16",
      "full name": "int16x4x4_t vld4_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4q_dup_s16",
      "full name": "int16x8x4_t vld4q_dup_s16(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4_dup_s32",
      "full name": "int32x2x4_t vld4_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [32]"
    },
    {
      "name": "vld4q_dup_s32",
      "full name": "int32x4x4_t vld4q_dup_s32(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [32]"
    },
    {
      "name": "vld4_dup_u8",
      "full name": "uint8x8x4_t vld4_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [8]"
    },
    {
      "name": "vld4q_dup_u8",
      "full name": "uint8x16x4_t vld4q_dup_u8(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [8]"
    },
    {
      "name": "vld4_dup_u16",
      "full name": "uint16x4x4_t vld4_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4q_dup_u16",
      "full name": "uint16x8x4_t vld4q_dup_u16(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4_dup_u32",
      "full name": "uint32x2x4_t vld4_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [32]"
    },
    {
      "name": "vld4q_dup_u32",
      "full name": "uint32x4x4_t vld4q_dup_u32(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [32]"
    },
    {
      "name": "vld4_dup_f16",
      "full name": "float16x4x4_t vld4_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4q_dup_f16",
      "full name": "float16x8x4_t vld4q_dup_f16(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4_dup_f32",
      "full name": "float32x2x4_t vld4_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [32]"
    },
    {
      "name": "vld4q_dup_f32",
      "full name": "float32x4x4_t vld4q_dup_f32(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [32]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [32]"
    },
    {
      "name": "vld4_dup_p8",
      "full name": "poly8x8x4_t vld4_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [8]"
    },
    {
      "name": "vld4q_dup_p8",
      "full name": "poly8x16x4_t vld4q_dup_p8(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [8]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [8]"
    },
    {
      "name": "vld4_dup_p16",
      "full name": "poly16x4x4_t vld4_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4q_dup_p16",
      "full name": "poly16x8x4_t vld4q_dup_p16(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [16]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [16]"
    },
    {
      "name": "vld4_dup_s64",
      "full name": "int64x1x4_t vld4_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vld4_dup_u64",
      "full name": "uint64x1x4_t vld4_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vld4_dup_p64",
      "full name": "poly64x1x4_t vld4_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vld4q_dup_s64",
      "full name": "int64x2x4_t vld4q_dup_s64(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vld4q_dup_u64",
      "full name": "uint64x2x4_t vld4q_dup_u64(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vld4q_dup_p64",
      "full name": "poly64x2x4_t vld4q_dup_p64(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vld4_dup_f64",
      "full name": "float64x1x4_t vld4_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vld4q_dup_f64",
      "full name": "float64x2x4_t vld4q_dup_f64(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4r",
      "function_en": "[vector] ld4r [64]",
      "function_cn": "[向量] 将指针指向的内容复制到4个向量中的每个元素 [64]"
    },
    {
      "name": "vst2_s8",
      "full name": "void vst2_s8(int8_t * ptr, int8x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst2q_s8",
      "full name": "void vst2q_s8(int8_t * ptr, int8x16x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst2_s16",
      "full name": "void vst2_s16(int16_t * ptr, int16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2q_s16",
      "full name": "void vst2q_s16(int16_t * ptr, int16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2_s32",
      "full name": "void vst2_s32(int32_t * ptr, int32x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst2q_s32",
      "full name": "void vst2q_s32(int32_t * ptr, int32x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst2_u8",
      "full name": "void vst2_u8(uint8_t * ptr, uint8x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst2q_u8",
      "full name": "void vst2q_u8(uint8_t * ptr, uint8x16x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst2_u16",
      "full name": "void vst2_u16(uint16_t * ptr, uint16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2q_u16",
      "full name": "void vst2q_u16(uint16_t * ptr, uint16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2_u32",
      "full name": "void vst2_u32(uint32_t * ptr, uint32x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst2q_u32",
      "full name": "void vst2q_u32(uint32_t * ptr, uint32x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst2_f16",
      "full name": "void vst2_f16(float16_t * ptr, float16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2q_f16",
      "full name": "void vst2q_f16(float16_t * ptr, float16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2_f32",
      "full name": "void vst2_f32(float32_t * ptr, float32x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst2q_f32",
      "full name": "void vst2q_f32(float32_t * ptr, float32x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst2_p8",
      "full name": "void vst2_p8(poly8_t * ptr, poly8x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst2q_p8",
      "full name": "void vst2q_p8(poly8_t * ptr, poly8x16x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst2_p16",
      "full name": "void vst2_p16(poly16_t * ptr, poly16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2q_p16",
      "full name": "void vst2q_p16(poly16_t * ptr, poly16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst2_s64",
      "full name": "void vst2_s64(int64_t * ptr, int64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst2_u64",
      "full name": "void vst2_u64(uint64_t * ptr, uint64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst2_p64",
      "full name": "void vst2_p64(poly64_t * ptr, poly64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst2q_s64",
      "full name": "void vst2q_s64(int64_t * ptr, int64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst2q_u64",
      "full name": "void vst2q_u64(uint64_t * ptr, uint64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst2q_p64",
      "full name": "void vst2q_p64(poly64_t * ptr, poly64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst2_f64",
      "full name": "void vst2_f64(float64_t * ptr, float64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst2q_f64",
      "full name": "void vst2q_f64(float64_t * ptr, float64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3_s8",
      "full name": "void vst3_s8(int8_t * ptr, int8x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst3q_s8",
      "full name": "void vst3q_s8(int8_t * ptr, int8x16x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst3_s16",
      "full name": "void vst3_s16(int16_t * ptr, int16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3q_s16",
      "full name": "void vst3q_s16(int16_t * ptr, int16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3_s32",
      "full name": "void vst3_s32(int32_t * ptr, int32x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst3q_s32",
      "full name": "void vst3q_s32(int32_t * ptr, int32x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst3_u8",
      "full name": "void vst3_u8(uint8_t * ptr, uint8x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst3q_u8",
      "full name": "void vst3q_u8(uint8_t * ptr, uint8x16x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst3_u16",
      "full name": "void vst3_u16(uint16_t * ptr, uint16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3q_u16",
      "full name": "void vst3q_u16(uint16_t * ptr, uint16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3_u32",
      "full name": "void vst3_u32(uint32_t * ptr, uint32x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst3q_u32",
      "full name": "void vst3q_u32(uint32_t * ptr, uint32x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst3_f16",
      "full name": "void vst3_f16(float16_t * ptr, float16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3q_f16",
      "full name": "void vst3q_f16(float16_t * ptr, float16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3_f32",
      "full name": "void vst3_f32(float32_t * ptr, float32x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst3q_f32",
      "full name": "void vst3q_f32(float32_t * ptr, float32x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst3_p8",
      "full name": "void vst3_p8(poly8_t * ptr, poly8x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst3q_p8",
      "full name": "void vst3q_p8(poly8_t * ptr, poly8x16x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst3_p16",
      "full name": "void vst3_p16(poly16_t * ptr, poly16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3q_p16",
      "full name": "void vst3q_p16(poly16_t * ptr, poly16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst3_s64",
      "full name": "void vst3_s64(int64_t * ptr, int64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3_u64",
      "full name": "void vst3_u64(uint64_t * ptr, uint64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3_p64",
      "full name": "void vst3_p64(poly64_t * ptr, poly64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3q_s64",
      "full name": "void vst3q_s64(int64_t * ptr, int64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3q_u64",
      "full name": "void vst3q_u64(uint64_t * ptr, uint64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3q_p64",
      "full name": "void vst3q_p64(poly64_t * ptr, poly64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3_f64",
      "full name": "void vst3_f64(float64_t * ptr, float64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst3q_f64",
      "full name": "void vst3q_f64(float64_t * ptr, float64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4_s8",
      "full name": "void vst4_s8(int8_t * ptr, int8x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst4q_s8",
      "full name": "void vst4q_s8(int8_t * ptr, int8x16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst4_s16",
      "full name": "void vst4_s16(int16_t * ptr, int16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4q_s16",
      "full name": "void vst4q_s16(int16_t * ptr, int16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4_s32",
      "full name": "void vst4_s32(int32_t * ptr, int32x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst4q_s32",
      "full name": "void vst4q_s32(int32_t * ptr, int32x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst4_u8",
      "full name": "void vst4_u8(uint8_t * ptr, uint8x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst4q_u8",
      "full name": "void vst4q_u8(uint8_t * ptr, uint8x16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst4_u16",
      "full name": "void vst4_u16(uint16_t * ptr, uint16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4q_u16",
      "full name": "void vst4q_u16(uint16_t * ptr, uint16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4_u32",
      "full name": "void vst4_u32(uint32_t * ptr, uint32x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst4q_u32",
      "full name": "void vst4q_u32(uint32_t * ptr, uint32x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst4_f16",
      "full name": "void vst4_f16(float16_t * ptr, float16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4q_f16",
      "full name": "void vst4q_f16(float16_t * ptr, float16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4_f32",
      "full name": "void vst4_f32(float32_t * ptr, float32x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst4q_f32",
      "full name": "void vst4q_f32(float32_t * ptr, float32x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [32]"
    },
    {
      "name": "vst4_p8",
      "full name": "void vst4_p8(poly8_t * ptr, poly8x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst4q_p8",
      "full name": "void vst4q_p8(poly8_t * ptr, poly8x16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [8]"
    },
    {
      "name": "vst4_p16",
      "full name": "void vst4_p16(poly16_t * ptr, poly16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4q_p16",
      "full name": "void vst4q_p16(poly16_t * ptr, poly16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [16]"
    },
    {
      "name": "vst4_s64",
      "full name": "void vst4_s64(int64_t * ptr, int64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4_u64",
      "full name": "void vst4_u64(uint64_t * ptr, uint64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4_p64",
      "full name": "void vst4_p64(poly64_t * ptr, poly64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4q_s64",
      "full name": "void vst4q_s64(int64_t * ptr, int64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4q_u64",
      "full name": "void vst4q_u64(uint64_t * ptr, uint64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4q_p64",
      "full name": "void vst4q_p64(poly64_t * ptr, poly64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4_f64",
      "full name": "void vst4_f64(float64_t * ptr, float64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vst4q_f64",
      "full name": "void vst4q_f64(float64_t * ptr, float64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中的内容写入指针指向的内存 [64]"
    },
    {
      "name": "vld2_lane_s16",
      "full name": "int16x4x2_t vld2_lane_s16(int16_t const * ptr, int16x4x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2q_lane_s16",
      "full name": "int16x8x2_t vld2q_lane_s16(int16_t const * ptr, int16x8x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2_lane_s32",
      "full name": "int32x2x2_t vld2_lane_s32(int32_t const * ptr, int32x2x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [32]"
    },
    {
      "name": "vld2q_lane_s32",
      "full name": "int32x4x2_t vld2q_lane_s32(int32_t const * ptr, int32x4x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [32]"
    },
    {
      "name": "vld2_lane_u16",
      "full name": "uint16x4x2_t vld2_lane_u16(uint16_t const * ptr, uint16x4x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2q_lane_u16",
      "full name": "uint16x8x2_t vld2q_lane_u16(uint16_t const * ptr, uint16x8x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2_lane_u32",
      "full name": "uint32x2x2_t vld2_lane_u32(uint32_t const * ptr, uint32x2x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [32]"
    },
    {
      "name": "vld2q_lane_u32",
      "full name": "uint32x4x2_t vld2q_lane_u32(uint32_t const * ptr, uint32x4x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [32]"
    },
    {
      "name": "vld2_lane_f16",
      "full name": "float16x4x2_t vld2_lane_f16(float16_t const * ptr, float16x4x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2q_lane_f16",
      "full name": "float16x8x2_t vld2q_lane_f16(float16_t const * ptr, float16x8x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2_lane_f32",
      "full name": "float32x2x2_t vld2_lane_f32(float32_t const * ptr, float32x2x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [32]"
    },
    {
      "name": "vld2q_lane_f32",
      "full name": "float32x4x2_t vld2q_lane_f32(float32_t const * ptr, float32x4x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [32]"
    },
    {
      "name": "vld2_lane_p16",
      "full name": "poly16x4x2_t vld2_lane_p16(poly16_t const * ptr, poly16x4x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2q_lane_p16",
      "full name": "poly16x8x2_t vld2q_lane_p16(poly16_t const * ptr, poly16x8x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [16]"
    },
    {
      "name": "vld2_lane_s8",
      "full name": "int8x8x2_t vld2_lane_s8(int8_t const * ptr, int8x8x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [8]"
    },
    {
      "name": "vld2_lane_u8",
      "full name": "uint8x8x2_t vld2_lane_u8(uint8_t const * ptr, uint8x8x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [8]"
    },
    {
      "name": "vld2_lane_p8",
      "full name": "poly8x8x2_t vld2_lane_p8(poly8_t const * ptr, poly8x8x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [8]"
    },
    {
      "name": "vld2q_lane_s8",
      "full name": "int8x16x2_t vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [8]"
    },
    {
      "name": "vld2q_lane_u8",
      "full name": "uint8x16x2_t vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [8]"
    },
    {
      "name": "vld2q_lane_p8",
      "full name": "poly8x16x2_t vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [8]"
    },
    {
      "name": "vld2_lane_s64",
      "full name": "int64x1x2_t vld2_lane_s64(int64_t const * ptr, int64x1x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld2q_lane_s64",
      "full name": "int64x2x2_t vld2q_lane_s64(int64_t const * ptr, int64x2x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld2_lane_u64",
      "full name": "uint64x1x2_t vld2_lane_u64(uint64_t const * ptr, uint64x1x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld2q_lane_u64",
      "full name": "uint64x2x2_t vld2q_lane_u64(uint64_t const * ptr, uint64x2x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld2_lane_p64",
      "full name": "poly64x1x2_t vld2_lane_p64(poly64_t const * ptr, poly64x1x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld2q_lane_p64",
      "full name": "poly64x2x2_t vld2q_lane_p64(poly64_t const * ptr, poly64x2x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld2_lane_f64",
      "full name": "float64x1x2_t vld2_lane_f64(float64_t const * ptr, float64x1x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld2q_lane_f64",
      "full name": "float64x2x2_t vld2q_lane_f64(float64_t const * ptr, float64x2x2_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld2",
      "function_en": "[vector] ld2 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入2个向量中指定的元素 [64]"
    },
    {
      "name": "vld3_lane_s16",
      "full name": "int16x4x3_t vld3_lane_s16(int16_t const * ptr, int16x4x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3q_lane_s16",
      "full name": "int16x8x3_t vld3q_lane_s16(int16_t const * ptr, int16x8x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3_lane_s32",
      "full name": "int32x2x3_t vld3_lane_s32(int32_t const * ptr, int32x2x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [32]"
    },
    {
      "name": "vld3q_lane_s32",
      "full name": "int32x4x3_t vld3q_lane_s32(int32_t const * ptr, int32x4x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [32]"
    },
    {
      "name": "vld3_lane_u16",
      "full name": "uint16x4x3_t vld3_lane_u16(uint16_t const * ptr, uint16x4x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3q_lane_u16",
      "full name": "uint16x8x3_t vld3q_lane_u16(uint16_t const * ptr, uint16x8x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3_lane_u32",
      "full name": "uint32x2x3_t vld3_lane_u32(uint32_t const * ptr, uint32x2x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [32]"
    },
    {
      "name": "vld3q_lane_u32",
      "full name": "uint32x4x3_t vld3q_lane_u32(uint32_t const * ptr, uint32x4x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [32]"
    },
    {
      "name": "vld3_lane_f16",
      "full name": "float16x4x3_t vld3_lane_f16(float16_t const * ptr, float16x4x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3q_lane_f16",
      "full name": "float16x8x3_t vld3q_lane_f16(float16_t const * ptr, float16x8x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3_lane_f32",
      "full name": "float32x2x3_t vld3_lane_f32(float32_t const * ptr, float32x2x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [32]"
    },
    {
      "name": "vld3q_lane_f32",
      "full name": "float32x4x3_t vld3q_lane_f32(float32_t const * ptr, float32x4x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [32]"
    },
    {
      "name": "vld3_lane_p16",
      "full name": "poly16x4x3_t vld3_lane_p16(poly16_t const * ptr, poly16x4x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3q_lane_p16",
      "full name": "poly16x8x3_t vld3q_lane_p16(poly16_t const * ptr, poly16x8x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [16]"
    },
    {
      "name": "vld3_lane_s8",
      "full name": "int8x8x3_t vld3_lane_s8(int8_t const * ptr, int8x8x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [8]"
    },
    {
      "name": "vld3_lane_u8",
      "full name": "uint8x8x3_t vld3_lane_u8(uint8_t const * ptr, uint8x8x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [8]"
    },
    {
      "name": "vld3_lane_p8",
      "full name": "poly8x8x3_t vld3_lane_p8(poly8_t const * ptr, poly8x8x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [8]"
    },
    {
      "name": "vld3q_lane_s8",
      "full name": "int8x16x3_t vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [8]"
    },
    {
      "name": "vld3q_lane_u8",
      "full name": "uint8x16x3_t vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [8]"
    },
    {
      "name": "vld3q_lane_p8",
      "full name": "poly8x16x3_t vld3q_lane_p8(poly8_t const * ptr, poly8x16x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [8]"
    },
    {
      "name": "vld3_lane_s64",
      "full name": "int64x1x3_t vld3_lane_s64(int64_t const * ptr, int64x1x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld3q_lane_s64",
      "full name": "int64x2x3_t vld3q_lane_s64(int64_t const * ptr, int64x2x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld3_lane_u64",
      "full name": "uint64x1x3_t vld3_lane_u64(uint64_t const * ptr, uint64x1x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld3q_lane_u64",
      "full name": "uint64x2x3_t vld3q_lane_u64(uint64_t const * ptr, uint64x2x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld3_lane_p64",
      "full name": "poly64x1x3_t vld3_lane_p64(poly64_t const * ptr, poly64x1x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld3q_lane_p64",
      "full name": "poly64x2x3_t vld3q_lane_p64(poly64_t const * ptr, poly64x2x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld3_lane_f64",
      "full name": "float64x1x3_t vld3_lane_f64(float64_t const * ptr, float64x1x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld3q_lane_f64",
      "full name": "float64x2x3_t vld3q_lane_f64(float64_t const * ptr, float64x2x3_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld3",
      "function_en": "[vector] ld3 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入3个向量中指定的元素 [64]"
    },
    {
      "name": "vld4_lane_s16",
      "full name": "int16x4x4_t vld4_lane_s16(int16_t const * ptr, int16x4x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4q_lane_s16",
      "full name": "int16x8x4_t vld4q_lane_s16(int16_t const * ptr, int16x8x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4_lane_s32",
      "full name": "int32x2x4_t vld4_lane_s32(int32_t const * ptr, int32x2x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [32]"
    },
    {
      "name": "vld4q_lane_s32",
      "full name": "int32x4x4_t vld4q_lane_s32(int32_t const * ptr, int32x4x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [32]"
    },
    {
      "name": "vld4_lane_u16",
      "full name": "uint16x4x4_t vld4_lane_u16(uint16_t const * ptr, uint16x4x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4q_lane_u16",
      "full name": "uint16x8x4_t vld4q_lane_u16(uint16_t const * ptr, uint16x8x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4_lane_u32",
      "full name": "uint32x2x4_t vld4_lane_u32(uint32_t const * ptr, uint32x2x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [32]"
    },
    {
      "name": "vld4q_lane_u32",
      "full name": "uint32x4x4_t vld4q_lane_u32(uint32_t const * ptr, uint32x4x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [32]"
    },
    {
      "name": "vld4_lane_f16",
      "full name": "float16x4x4_t vld4_lane_f16(float16_t const * ptr, float16x4x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4q_lane_f16",
      "full name": "float16x8x4_t vld4q_lane_f16(float16_t const * ptr, float16x8x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4_lane_f32",
      "full name": "float32x2x4_t vld4_lane_f32(float32_t const * ptr, float32x2x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [32]"
    },
    {
      "name": "vld4q_lane_f32",
      "full name": "float32x4x4_t vld4q_lane_f32(float32_t const * ptr, float32x4x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [32]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [32]"
    },
    {
      "name": "vld4_lane_p16",
      "full name": "poly16x4x4_t vld4_lane_p16(poly16_t const * ptr, poly16x4x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4q_lane_p16",
      "full name": "poly16x8x4_t vld4q_lane_p16(poly16_t const * ptr, poly16x8x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [16]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [16]"
    },
    {
      "name": "vld4_lane_s8",
      "full name": "int8x8x4_t vld4_lane_s8(int8_t const * ptr, int8x8x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [8]"
    },
    {
      "name": "vld4_lane_u8",
      "full name": "uint8x8x4_t vld4_lane_u8(uint8_t const * ptr, uint8x8x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [8]"
    },
    {
      "name": "vld4_lane_p8",
      "full name": "poly8x8x4_t vld4_lane_p8(poly8_t const * ptr, poly8x8x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [8]"
    },
    {
      "name": "vld4q_lane_s8",
      "full name": "int8x16x4_t vld4q_lane_s8(int8_t const * ptr, int8x16x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [8]"
    },
    {
      "name": "vld4q_lane_u8",
      "full name": "uint8x16x4_t vld4q_lane_u8(uint8_t const * ptr, uint8x16x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [8]"
    },
    {
      "name": "vld4q_lane_p8",
      "full name": "poly8x16x4_t vld4q_lane_p8(poly8_t const * ptr, poly8x16x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [8]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [8]"
    },
    {
      "name": "vld4_lane_s64",
      "full name": "int64x1x4_t vld4_lane_s64(int64_t const * ptr, int64x1x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vld4q_lane_s64",
      "full name": "int64x2x4_t vld4q_lane_s64(int64_t const * ptr, int64x2x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vld4_lane_u64",
      "full name": "uint64x1x4_t vld4_lane_u64(uint64_t const * ptr, uint64x1x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vld4q_lane_u64",
      "full name": "uint64x2x4_t vld4q_lane_u64(uint64_t const * ptr, uint64x2x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vld4_lane_p64",
      "full name": "poly64x1x4_t vld4_lane_p64(poly64_t const * ptr, poly64x1x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vld4q_lane_p64",
      "full name": "poly64x2x4_t vld4q_lane_p64(poly64_t const * ptr, poly64x2x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vld4_lane_f64",
      "full name": "float64x1x4_t vld4_lane_f64(float64_t const * ptr, float64x1x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vld4q_lane_f64",
      "full name": "float64x2x4_t vld4q_lane_f64(float64_t const * ptr, float64x2x4_t src, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld4",
      "function_en": "[vector] ld4 [64]",
      "function_cn": "[向量] 将指针指向的内存中的数据写入4个向量中指定的元素 [64]"
    },
    {
      "name": "vst2_lane_s8",
      "full name": "void vst2_lane_s8(int8_t * ptr, int8x8x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst2_lane_u8",
      "full name": "void vst2_lane_u8(uint8_t * ptr, uint8x8x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst2_lane_p8",
      "full name": "void vst2_lane_p8(poly8_t * ptr, poly8x8x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst3_lane_s8",
      "full name": "void vst3_lane_s8(int8_t * ptr, int8x8x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst3_lane_u8",
      "full name": "void vst3_lane_u8(uint8_t * ptr, uint8x8x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst3_lane_p8",
      "full name": "void vst3_lane_p8(poly8_t * ptr, poly8x8x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst4_lane_s8",
      "full name": "void vst4_lane_s8(int8_t * ptr, int8x8x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst4_lane_u8",
      "full name": "void vst4_lane_u8(uint8_t * ptr, uint8x8x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst4_lane_p8",
      "full name": "void vst4_lane_p8(poly8_t * ptr, poly8x8x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst2_lane_s16",
      "full name": "void vst2_lane_s16(int16_t * ptr, int16x4x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2q_lane_s16",
      "full name": "void vst2q_lane_s16(int16_t * ptr, int16x8x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2_lane_s32",
      "full name": "void vst2_lane_s32(int32_t * ptr, int32x2x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst2q_lane_s32",
      "full name": "void vst2q_lane_s32(int32_t * ptr, int32x4x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst2_lane_u16",
      "full name": "void vst2_lane_u16(uint16_t * ptr, uint16x4x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2q_lane_u16",
      "full name": "void vst2q_lane_u16(uint16_t * ptr, uint16x8x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2_lane_u32",
      "full name": "void vst2_lane_u32(uint32_t * ptr, uint32x2x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst2q_lane_u32",
      "full name": "void vst2q_lane_u32(uint32_t * ptr, uint32x4x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst2_lane_f16",
      "full name": "void vst2_lane_f16(float16_t * ptr, float16x4x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2q_lane_f16",
      "full name": "void vst2q_lane_f16(float16_t * ptr, float16x8x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2_lane_f32",
      "full name": "void vst2_lane_f32(float32_t * ptr, float32x2x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst2q_lane_f32",
      "full name": "void vst2q_lane_f32(float32_t * ptr, float32x4x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [32]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst2_lane_p16",
      "full name": "void vst2_lane_p16(poly16_t * ptr, poly16x4x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2q_lane_p16",
      "full name": "void vst2q_lane_p16(poly16_t * ptr, poly16x8x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [16]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst2q_lane_s8",
      "full name": "void vst2q_lane_s8(int8_t * ptr, int8x16x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst2q_lane_u8",
      "full name": "void vst2q_lane_u8(uint8_t * ptr, uint8x16x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst2q_lane_p8",
      "full name": "void vst2q_lane_p8(poly8_t * ptr, poly8x16x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [8]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst2_lane_s64",
      "full name": "void vst2_lane_s64(int64_t * ptr, int64x1x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst2q_lane_s64",
      "full name": "void vst2q_lane_s64(int64_t * ptr, int64x2x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst2_lane_u64",
      "full name": "void vst2_lane_u64(uint64_t * ptr, uint64x1x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst2q_lane_u64",
      "full name": "void vst2q_lane_u64(uint64_t * ptr, uint64x2x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst2_lane_p64",
      "full name": "void vst2_lane_p64(poly64_t * ptr, poly64x1x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst2q_lane_p64",
      "full name": "void vst2q_lane_p64(poly64_t * ptr, poly64x2x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst2_lane_f64",
      "full name": "void vst2_lane_f64(float64_t * ptr, float64x1x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst2q_lane_f64",
      "full name": "void vst2q_lane_f64(float64_t * ptr, float64x2x2_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st2",
      "function_en": "[vector] st2 [64]",
      "function_cn": "[向量] 将2个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3_lane_s16",
      "full name": "void vst3_lane_s16(int16_t * ptr, int16x4x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3q_lane_s16",
      "full name": "void vst3q_lane_s16(int16_t * ptr, int16x8x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3_lane_s32",
      "full name": "void vst3_lane_s32(int32_t * ptr, int32x2x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst3q_lane_s32",
      "full name": "void vst3q_lane_s32(int32_t * ptr, int32x4x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst3_lane_u16",
      "full name": "void vst3_lane_u16(uint16_t * ptr, uint16x4x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3q_lane_u16",
      "full name": "void vst3q_lane_u16(uint16_t * ptr, uint16x8x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3_lane_u32",
      "full name": "void vst3_lane_u32(uint32_t * ptr, uint32x2x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst3q_lane_u32",
      "full name": "void vst3q_lane_u32(uint32_t * ptr, uint32x4x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst3_lane_f16",
      "full name": "void vst3_lane_f16(float16_t * ptr, float16x4x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3q_lane_f16",
      "full name": "void vst3q_lane_f16(float16_t * ptr, float16x8x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3_lane_f32",
      "full name": "void vst3_lane_f32(float32_t * ptr, float32x2x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst3q_lane_f32",
      "full name": "void vst3q_lane_f32(float32_t * ptr, float32x4x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [32]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst3_lane_p16",
      "full name": "void vst3_lane_p16(poly16_t * ptr, poly16x4x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3q_lane_p16",
      "full name": "void vst3q_lane_p16(poly16_t * ptr, poly16x8x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [16]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst3q_lane_s8",
      "full name": "void vst3q_lane_s8(int8_t * ptr, int8x16x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst3q_lane_u8",
      "full name": "void vst3q_lane_u8(uint8_t * ptr, uint8x16x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst3q_lane_p8",
      "full name": "void vst3q_lane_p8(poly8_t * ptr, poly8x16x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [8]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst3_lane_s64",
      "full name": "void vst3_lane_s64(int64_t * ptr, int64x1x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3q_lane_s64",
      "full name": "void vst3q_lane_s64(int64_t * ptr, int64x2x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3_lane_u64",
      "full name": "void vst3_lane_u64(uint64_t * ptr, uint64x1x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3q_lane_u64",
      "full name": "void vst3q_lane_u64(uint64_t * ptr, uint64x2x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3_lane_p64",
      "full name": "void vst3_lane_p64(poly64_t * ptr, poly64x1x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3q_lane_p64",
      "full name": "void vst3q_lane_p64(poly64_t * ptr, poly64x2x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3_lane_f64",
      "full name": "void vst3_lane_f64(float64_t * ptr, float64x1x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst3q_lane_f64",
      "full name": "void vst3q_lane_f64(float64_t * ptr, float64x2x3_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st3",
      "function_en": "[vector] st3 [64]",
      "function_cn": "[向量] 将3个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4_lane_s16",
      "full name": "void vst4_lane_s16(int16_t * ptr, int16x4x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4q_lane_s16",
      "full name": "void vst4q_lane_s16(int16_t * ptr, int16x8x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4_lane_s32",
      "full name": "void vst4_lane_s32(int32_t * ptr, int32x2x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst4q_lane_s32",
      "full name": "void vst4q_lane_s32(int32_t * ptr, int32x4x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst4_lane_u16",
      "full name": "void vst4_lane_u16(uint16_t * ptr, uint16x4x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4q_lane_u16",
      "full name": "void vst4q_lane_u16(uint16_t * ptr, uint16x8x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4_lane_u32",
      "full name": "void vst4_lane_u32(uint32_t * ptr, uint32x2x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst4q_lane_u32",
      "full name": "void vst4q_lane_u32(uint32_t * ptr, uint32x4x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst4_lane_f16",
      "full name": "void vst4_lane_f16(float16_t * ptr, float16x4x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4q_lane_f16",
      "full name": "void vst4q_lane_f16(float16_t * ptr, float16x8x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4_lane_f32",
      "full name": "void vst4_lane_f32(float32_t * ptr, float32x2x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst4q_lane_f32",
      "full name": "void vst4q_lane_f32(float32_t * ptr, float32x4x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [32]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [32]"
    },
    {
      "name": "vst4_lane_p16",
      "full name": "void vst4_lane_p16(poly16_t * ptr, poly16x4x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4q_lane_p16",
      "full name": "void vst4q_lane_p16(poly16_t * ptr, poly16x8x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [16]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [16]"
    },
    {
      "name": "vst4q_lane_s8",
      "full name": "void vst4q_lane_s8(int8_t * ptr, int8x16x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst4q_lane_u8",
      "full name": "void vst4q_lane_u8(uint8_t * ptr, uint8x16x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst4q_lane_p8",
      "full name": "void vst4q_lane_p8(poly8_t * ptr, poly8x16x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [8]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [8]"
    },
    {
      "name": "vst4_lane_s64",
      "full name": "void vst4_lane_s64(int64_t * ptr, int64x1x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4q_lane_s64",
      "full name": "void vst4q_lane_s64(int64_t * ptr, int64x2x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4_lane_u64",
      "full name": "void vst4_lane_u64(uint64_t * ptr, uint64x1x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4q_lane_u64",
      "full name": "void vst4q_lane_u64(uint64_t * ptr, uint64x2x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4_lane_p64",
      "full name": "void vst4_lane_p64(poly64_t * ptr, poly64x1x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4q_lane_p64",
      "full name": "void vst4q_lane_p64(poly64_t * ptr, poly64x2x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4_lane_f64",
      "full name": "void vst4_lane_f64(float64_t * ptr, float64x1x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst4q_lane_f64",
      "full name": "void vst4q_lane_f64(float64_t * ptr, float64x2x4_t val, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st4",
      "function_en": "[vector] st4 [64]",
      "function_cn": "[向量] 将4个向量寄存器中指定的元素内容写入指针指向的内存中 [64]"
    },
    {
      "name": "vst1_s8_x2",
      "full name": "void vst1_s8_x2(int8_t * ptr, int8x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_s8_x2",
      "full name": "void vst1q_s8_x2(int8_t * ptr, int8x16x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_s16_x2",
      "full name": "void vst1_s16_x2(int16_t * ptr, int16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_s16_x2",
      "full name": "void vst1q_s16_x2(int16_t * ptr, int16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_s32_x2",
      "full name": "void vst1_s32_x2(int32_t * ptr, int32x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_s32_x2",
      "full name": "void vst1q_s32_x2(int32_t * ptr, int32x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_u8_x2",
      "full name": "void vst1_u8_x2(uint8_t * ptr, uint8x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_u8_x2",
      "full name": "void vst1q_u8_x2(uint8_t * ptr, uint8x16x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_u16_x2",
      "full name": "void vst1_u16_x2(uint16_t * ptr, uint16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_u16_x2",
      "full name": "void vst1q_u16_x2(uint16_t * ptr, uint16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_u32_x2",
      "full name": "void vst1_u32_x2(uint32_t * ptr, uint32x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_u32_x2",
      "full name": "void vst1q_u32_x2(uint32_t * ptr, uint32x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_f16_x2",
      "full name": "void vst1_f16_x2(float16_t * ptr, float16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_f16_x2",
      "full name": "void vst1q_f16_x2(float16_t * ptr, float16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_f32_x2",
      "full name": "void vst1_f32_x2(float32_t * ptr, float32x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_f32_x2",
      "full name": "void vst1q_f32_x2(float32_t * ptr, float32x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_p8_x2",
      "full name": "void vst1_p8_x2(poly8_t * ptr, poly8x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_p8_x2",
      "full name": "void vst1q_p8_x2(poly8_t * ptr, poly8x16x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_p16_x2",
      "full name": "void vst1_p16_x2(poly16_t * ptr, poly16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_p16_x2",
      "full name": "void vst1q_p16_x2(poly16_t * ptr, poly16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_s64_x2",
      "full name": "void vst1_s64_x2(int64_t * ptr, int64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_u64_x2",
      "full name": "void vst1_u64_x2(uint64_t * ptr, uint64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_p64_x2",
      "full name": "void vst1_p64_x2(poly64_t * ptr, poly64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_s64_x2",
      "full name": "void vst1q_s64_x2(int64_t * ptr, int64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_u64_x2",
      "full name": "void vst1q_u64_x2(uint64_t * ptr, uint64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_p64_x2",
      "full name": "void vst1q_p64_x2(poly64_t * ptr, poly64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_f64_x2",
      "full name": "void vst1_f64_x2(float64_t * ptr, float64x1x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_f64_x2",
      "full name": "void vst1q_f64_x2(float64_t * ptr, float64x2x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_s8_x3",
      "full name": "void vst1_s8_x3(int8_t * ptr, int8x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_s8_x3",
      "full name": "void vst1q_s8_x3(int8_t * ptr, int8x16x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_s16_x3",
      "full name": "void vst1_s16_x3(int16_t * ptr, int16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_s16_x3",
      "full name": "void vst1q_s16_x3(int16_t * ptr, int16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_s32_x3",
      "full name": "void vst1_s32_x3(int32_t * ptr, int32x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_s32_x3",
      "full name": "void vst1q_s32_x3(int32_t * ptr, int32x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_u8_x3",
      "full name": "void vst1_u8_x3(uint8_t * ptr, uint8x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_u8_x3",
      "full name": "void vst1q_u8_x3(uint8_t * ptr, uint8x16x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_u16_x3",
      "full name": "void vst1_u16_x3(uint16_t * ptr, uint16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_u16_x3",
      "full name": "void vst1q_u16_x3(uint16_t * ptr, uint16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_u32_x3",
      "full name": "void vst1_u32_x3(uint32_t * ptr, uint32x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_u32_x3",
      "full name": "void vst1q_u32_x3(uint32_t * ptr, uint32x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_f16_x3",
      "full name": "void vst1_f16_x3(float16_t * ptr, float16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_f16_x3",
      "full name": "void vst1q_f16_x3(float16_t * ptr, float16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_f32_x3",
      "full name": "void vst1_f32_x3(float32_t * ptr, float32x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_f32_x3",
      "full name": "void vst1q_f32_x3(float32_t * ptr, float32x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_p8_x3",
      "full name": "void vst1_p8_x3(poly8_t * ptr, poly8x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_p8_x3",
      "full name": "void vst1q_p8_x3(poly8_t * ptr, poly8x16x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_p16_x3",
      "full name": "void vst1_p16_x3(poly16_t * ptr, poly16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_p16_x3",
      "full name": "void vst1q_p16_x3(poly16_t * ptr, poly16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_s64_x3",
      "full name": "void vst1_s64_x3(int64_t * ptr, int64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_u64_x3",
      "full name": "void vst1_u64_x3(uint64_t * ptr, uint64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_p64_x3",
      "full name": "void vst1_p64_x3(poly64_t * ptr, poly64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_s64_x3",
      "full name": "void vst1q_s64_x3(int64_t * ptr, int64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_u64_x3",
      "full name": "void vst1q_u64_x3(uint64_t * ptr, uint64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_p64_x3",
      "full name": "void vst1q_p64_x3(poly64_t * ptr, poly64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_f64_x3",
      "full name": "void vst1_f64_x3(float64_t * ptr, float64x1x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_f64_x3",
      "full name": "void vst1q_f64_x3(float64_t * ptr, float64x2x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_s8_x4",
      "full name": "void vst1_s8_x4(int8_t * ptr, int8x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_s8_x4",
      "full name": "void vst1q_s8_x4(int8_t * ptr, int8x16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_s16_x4",
      "full name": "void vst1_s16_x4(int16_t * ptr, int16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_s16_x4",
      "full name": "void vst1q_s16_x4(int16_t * ptr, int16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_s32_x4",
      "full name": "void vst1_s32_x4(int32_t * ptr, int32x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_s32_x4",
      "full name": "void vst1q_s32_x4(int32_t * ptr, int32x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_u8_x4",
      "full name": "void vst1_u8_x4(uint8_t * ptr, uint8x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_u8_x4",
      "full name": "void vst1q_u8_x4(uint8_t * ptr, uint8x16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_u16_x4",
      "full name": "void vst1_u16_x4(uint16_t * ptr, uint16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_u16_x4",
      "full name": "void vst1q_u16_x4(uint16_t * ptr, uint16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_u32_x4",
      "full name": "void vst1_u32_x4(uint32_t * ptr, uint32x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_u32_x4",
      "full name": "void vst1q_u32_x4(uint32_t * ptr, uint32x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_f16_x4",
      "full name": "void vst1_f16_x4(float16_t * ptr, float16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_f16_x4",
      "full name": "void vst1q_f16_x4(float16_t * ptr, float16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_f32_x4",
      "full name": "void vst1_f32_x4(float32_t * ptr, float32x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1q_f32_x4",
      "full name": "void vst1q_f32_x4(float32_t * ptr, float32x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [32]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [32]"
    },
    {
      "name": "vst1_p8_x4",
      "full name": "void vst1_p8_x4(poly8_t * ptr, poly8x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1q_p8_x4",
      "full name": "void vst1q_p8_x4(poly8_t * ptr, poly8x16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [8]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [8]"
    },
    {
      "name": "vst1_p16_x4",
      "full name": "void vst1_p16_x4(poly16_t * ptr, poly16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1q_p16_x4",
      "full name": "void vst1q_p16_x4(poly16_t * ptr, poly16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [16]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [16]"
    },
    {
      "name": "vst1_s64_x4",
      "full name": "void vst1_s64_x4(int64_t * ptr, int64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_u64_x4",
      "full name": "void vst1_u64_x4(uint64_t * ptr, uint64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_p64_x4",
      "full name": "void vst1_p64_x4(poly64_t * ptr, poly64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_s64_x4",
      "full name": "void vst1q_s64_x4(int64_t * ptr, int64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_u64_x4",
      "full name": "void vst1q_u64_x4(uint64_t * ptr, uint64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_p64_x4",
      "full name": "void vst1q_p64_x4(poly64_t * ptr, poly64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1_f64_x4",
      "full name": "void vst1_f64_x4(float64_t * ptr, float64x1x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vst1q_f64_x4",
      "full name": "void vst1q_f64_x4(float64_t * ptr, float64x2x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "st1",
      "function_en": "[vector] st1 [64]",
      "function_cn": "[向量] 将向量寄存器中的内容存储到指针指向的内存 [64]"
    },
    {
      "name": "vld1_s8_x2",
      "full name": "int8x8x2_t vld1_s8_x2(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_s8_x2",
      "full name": "int8x16x2_t vld1q_s8_x2(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_s16_x2",
      "full name": "int16x4x2_t vld1_s16_x2(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_s16_x2",
      "full name": "int16x8x2_t vld1q_s16_x2(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_s32_x2",
      "full name": "int32x2x2_t vld1_s32_x2(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_s32_x2",
      "full name": "int32x4x2_t vld1q_s32_x2(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_u8_x2",
      "full name": "uint8x8x2_t vld1_u8_x2(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_u8_x2",
      "full name": "uint8x16x2_t vld1q_u8_x2(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_u16_x2",
      "full name": "uint16x4x2_t vld1_u16_x2(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_u16_x2",
      "full name": "uint16x8x2_t vld1q_u16_x2(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_u32_x2",
      "full name": "uint32x2x2_t vld1_u32_x2(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_u32_x2",
      "full name": "uint32x4x2_t vld1q_u32_x2(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_f16_x2",
      "full name": "float16x4x2_t vld1_f16_x2(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_f16_x2",
      "full name": "float16x8x2_t vld1q_f16_x2(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_f32_x2",
      "full name": "float32x2x2_t vld1_f32_x2(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_f32_x2",
      "full name": "float32x4x2_t vld1q_f32_x2(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_p8_x2",
      "full name": "poly8x8x2_t vld1_p8_x2(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_p8_x2",
      "full name": "poly8x16x2_t vld1q_p8_x2(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_p16_x2",
      "full name": "poly16x4x2_t vld1_p16_x2(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_p16_x2",
      "full name": "poly16x8x2_t vld1q_p16_x2(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_s64_x2",
      "full name": "int64x1x2_t vld1_s64_x2(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_u64_x2",
      "full name": "uint64x1x2_t vld1_u64_x2(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_p64_x2",
      "full name": "poly64x1x2_t vld1_p64_x2(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_s64_x2",
      "full name": "int64x2x2_t vld1q_s64_x2(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_u64_x2",
      "full name": "uint64x2x2_t vld1q_u64_x2(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_p64_x2",
      "full name": "poly64x2x2_t vld1q_p64_x2(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_f64_x2",
      "full name": "float64x1x2_t vld1_f64_x2(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_f64_x2",
      "full name": "float64x2x2_t vld1q_f64_x2(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_s8_x3",
      "full name": "int8x8x3_t vld1_s8_x3(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_s8_x3",
      "full name": "int8x16x3_t vld1q_s8_x3(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_s16_x3",
      "full name": "int16x4x3_t vld1_s16_x3(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_s16_x3",
      "full name": "int16x8x3_t vld1q_s16_x3(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_s32_x3",
      "full name": "int32x2x3_t vld1_s32_x3(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_s32_x3",
      "full name": "int32x4x3_t vld1q_s32_x3(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_u8_x3",
      "full name": "uint8x8x3_t vld1_u8_x3(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_u8_x3",
      "full name": "uint8x16x3_t vld1q_u8_x3(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_u16_x3",
      "full name": "uint16x4x3_t vld1_u16_x3(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_u16_x3",
      "full name": "uint16x8x3_t vld1q_u16_x3(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_u32_x3",
      "full name": "uint32x2x3_t vld1_u32_x3(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_u32_x3",
      "full name": "uint32x4x3_t vld1q_u32_x3(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_f16_x3",
      "full name": "float16x4x3_t vld1_f16_x3(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_f16_x3",
      "full name": "float16x8x3_t vld1q_f16_x3(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_f32_x3",
      "full name": "float32x2x3_t vld1_f32_x3(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_f32_x3",
      "full name": "float32x4x3_t vld1q_f32_x3(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_p8_x3",
      "full name": "poly8x8x3_t vld1_p8_x3(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_p8_x3",
      "full name": "poly8x16x3_t vld1q_p8_x3(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_p16_x3",
      "full name": "poly16x4x3_t vld1_p16_x3(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_p16_x3",
      "full name": "poly16x8x3_t vld1q_p16_x3(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_s64_x3",
      "full name": "int64x1x3_t vld1_s64_x3(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_u64_x3",
      "full name": "uint64x1x3_t vld1_u64_x3(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_p64_x3",
      "full name": "poly64x1x3_t vld1_p64_x3(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_s64_x3",
      "full name": "int64x2x3_t vld1q_s64_x3(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_u64_x3",
      "full name": "uint64x2x3_t vld1q_u64_x3(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_p64_x3",
      "full name": "poly64x2x3_t vld1q_p64_x3(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_f64_x3",
      "full name": "float64x1x3_t vld1_f64_x3(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_f64_x3",
      "full name": "float64x2x3_t vld1q_f64_x3(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_s8_x4",
      "full name": "int8x8x4_t vld1_s8_x4(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_s8_x4",
      "full name": "int8x16x4_t vld1q_s8_x4(int8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_s16_x4",
      "full name": "int16x4x4_t vld1_s16_x4(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_s16_x4",
      "full name": "int16x8x4_t vld1q_s16_x4(int16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_s32_x4",
      "full name": "int32x2x4_t vld1_s32_x4(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_s32_x4",
      "full name": "int32x4x4_t vld1q_s32_x4(int32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_u8_x4",
      "full name": "uint8x8x4_t vld1_u8_x4(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_u8_x4",
      "full name": "uint8x16x4_t vld1q_u8_x4(uint8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_u16_x4",
      "full name": "uint16x4x4_t vld1_u16_x4(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_u16_x4",
      "full name": "uint16x8x4_t vld1q_u16_x4(uint16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_u32_x4",
      "full name": "uint32x2x4_t vld1_u32_x4(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_u32_x4",
      "full name": "uint32x4x4_t vld1q_u32_x4(uint32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_f16_x4",
      "full name": "float16x4x4_t vld1_f16_x4(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_f16_x4",
      "full name": "float16x8x4_t vld1q_f16_x4(float16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_f32_x4",
      "full name": "float32x2x4_t vld1_f32_x4(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1q_f32_x4",
      "full name": "float32x4x4_t vld1q_f32_x4(float32_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [32]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [32]"
    },
    {
      "name": "vld1_p8_x4",
      "full name": "poly8x8x4_t vld1_p8_x4(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1q_p8_x4",
      "full name": "poly8x16x4_t vld1q_p8_x4(poly8_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [8]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [8]"
    },
    {
      "name": "vld1_p16_x4",
      "full name": "poly16x4x4_t vld1_p16_x4(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1q_p16_x4",
      "full name": "poly16x8x4_t vld1q_p16_x4(poly16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [16]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [16]"
    },
    {
      "name": "vld1_s64_x4",
      "full name": "int64x1x4_t vld1_s64_x4(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_u64_x4",
      "full name": "uint64x1x4_t vld1_u64_x4(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_p64_x4",
      "full name": "poly64x1x4_t vld1_p64_x4(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_s64_x4",
      "full name": "int64x2x4_t vld1q_s64_x4(int64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_u64_x4",
      "full name": "uint64x2x4_t vld1q_u64_x4(uint64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_p64_x4",
      "full name": "poly64x2x4_t vld1q_p64_x4(poly64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1_f64_x4",
      "full name": "float64x1x4_t vld1_f64_x4(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vld1q_f64_x4",
      "full name": "float64x2x4_t vld1q_f64_x4(float64_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ld1",
      "function_en": "[vector] ld1 [64]",
      "function_cn": "[向量] 将指针指向的内存中的内容加载到向量寄存器 [64]"
    },
    {
      "name": "vpadd_s8",
      "full name": "int8x8_t vpadd_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [8]",
      "function_cn": "[向量] 相邻元素相加 [8]"
    },
    {
      "name": "vpadd_s16",
      "full name": "int16x4_t vpadd_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [16]",
      "function_cn": "[向量] 相邻元素相加 [16]"
    },
    {
      "name": "vpadd_s32",
      "full name": "int32x2_t vpadd_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [32]",
      "function_cn": "[向量] 相邻元素相加 [32]"
    },
    {
      "name": "vpadd_u8",
      "full name": "uint8x8_t vpadd_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [8]",
      "function_cn": "[向量] 相邻元素相加 [8]"
    },
    {
      "name": "vpadd_u16",
      "full name": "uint16x4_t vpadd_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [16]",
      "function_cn": "[向量] 相邻元素相加 [16]"
    },
    {
      "name": "vpadd_u32",
      "full name": "uint32x2_t vpadd_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [32]",
      "function_cn": "[向量] 相邻元素相加 [32]"
    },
    {
      "name": "vpadd_f32",
      "full name": "float32x2_t vpadd_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [32]",
      "function_cn": "[向量] 浮点类型相邻元素相加 [32]"
    },
    {
      "name": "vpaddq_s8",
      "full name": "int8x16_t vpaddq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [8]",
      "function_cn": "[向量] 相邻元素相加 [8]"
    },
    {
      "name": "vpaddq_s16",
      "full name": "int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [16]",
      "function_cn": "[向量] 相邻元素相加 [16]"
    },
    {
      "name": "vpaddq_s32",
      "full name": "int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [32]",
      "function_cn": "[向量] 相邻元素相加 [32]"
    },
    {
      "name": "vpaddq_s64",
      "full name": "int64x2_t vpaddq_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [64]",
      "function_cn": "[向量] 相邻元素相加 [64]"
    },
    {
      "name": "vpaddq_u8",
      "full name": "uint8x16_t vpaddq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [8]",
      "function_cn": "[向量] 相邻元素相加 [8]"
    },
    {
      "name": "vpaddq_u16",
      "full name": "uint16x8_t vpaddq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [16]",
      "function_cn": "[向量] 相邻元素相加 [16]"
    },
    {
      "name": "vpaddq_u32",
      "full name": "uint32x4_t vpaddq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [32]",
      "function_cn": "[向量] 相邻元素相加 [32]"
    },
    {
      "name": "vpaddq_u64",
      "full name": "uint64x2_t vpaddq_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [64]",
      "function_cn": "[向量] 相邻元素相加 [64]"
    },
    {
      "name": "vpaddq_f32",
      "full name": "float32x4_t vpaddq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [32]",
      "function_cn": "[向量] 浮点类型相邻元素相加 [32]"
    },
    {
      "name": "vpaddq_f64",
      "full name": "float64x2_t vpaddq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [64]",
      "function_cn": "[向量] 浮点类型相邻元素相加 [64]"
    },
    {
      "name": "vpaddl_s8",
      "full name": "int16x4_t vpaddl_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlp",
      "function_en": "[vector] saddlp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加 [8]"
    },
    {
      "name": "vpaddlq_s8",
      "full name": "int16x8_t vpaddlq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlp",
      "function_en": "[vector] saddlp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加 [8]"
    },
    {
      "name": "vpaddl_s16",
      "full name": "int32x2_t vpaddl_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlp",
      "function_en": "[vector] saddlp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加 [16]"
    },
    {
      "name": "vpaddlq_s16",
      "full name": "int32x4_t vpaddlq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlp",
      "function_en": "[vector] saddlp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加 [16]"
    },
    {
      "name": "vpaddl_s32",
      "full name": "int64x1_t vpaddl_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlp",
      "function_en": "[vector] saddlp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加 [32]"
    },
    {
      "name": "vpaddlq_s32",
      "full name": "int64x2_t vpaddlq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlp",
      "function_en": "[vector] saddlp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加 [32]"
    },
    {
      "name": "vpaddl_u8",
      "full name": "uint16x4_t vpaddl_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlp",
      "function_en": "[vector] uaddlp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加 [8]"
    },
    {
      "name": "vpaddlq_u8",
      "full name": "uint16x8_t vpaddlq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlp",
      "function_en": "[vector] uaddlp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加 [8]"
    },
    {
      "name": "vpaddl_u16",
      "full name": "uint32x2_t vpaddl_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlp",
      "function_en": "[vector] uaddlp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加 [16]"
    },
    {
      "name": "vpaddlq_u16",
      "full name": "uint32x4_t vpaddlq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlp",
      "function_en": "[vector] uaddlp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加 [16]"
    },
    {
      "name": "vpaddl_u32",
      "full name": "uint64x1_t vpaddl_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlp",
      "function_en": "[vector] uaddlp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加 [32]"
    },
    {
      "name": "vpaddlq_u32",
      "full name": "uint64x2_t vpaddlq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlp",
      "function_en": "[vector] uaddlp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加 [32]"
    },
    {
      "name": "vpadal_s8",
      "full name": "int16x4_t vpadal_s8(int16x4_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sadalp",
      "function_en": "[vector] sadalp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加并累加到目标寄存器 [8]"
    },
    {
      "name": "vpadalq_s8",
      "full name": "int16x8_t vpadalq_s8(int16x8_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sadalp",
      "function_en": "[vector] sadalp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加并累加到目标寄存器 [8]"
    },
    {
      "name": "vpadal_s16",
      "full name": "int32x2_t vpadal_s16(int32x2_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sadalp",
      "function_en": "[vector] sadalp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加并累加到目标寄存器 [16]"
    },
    {
      "name": "vpadalq_s16",
      "full name": "int32x4_t vpadalq_s16(int32x4_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sadalp",
      "function_en": "[vector] sadalp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加并累加到目标寄存器 [16]"
    },
    {
      "name": "vpadal_s32",
      "full name": "int64x1_t vpadal_s32(int64x1_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sadalp",
      "function_en": "[vector] sadalp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加并累加到目标寄存器 [32]"
    },
    {
      "name": "vpadalq_s32",
      "full name": "int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sadalp",
      "function_en": "[vector] sadalp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素相加并累加到目标寄存器 [32]"
    },
    {
      "name": "vpadal_u8",
      "full name": "uint16x4_t vpadal_u8(uint16x4_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uadalp",
      "function_en": "[vector] uadalp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加并累加到目标寄存器 [8]"
    },
    {
      "name": "vpadalq_u8",
      "full name": "uint16x8_t vpadalq_u8(uint16x8_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uadalp",
      "function_en": "[vector] uadalp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加并累加到目标寄存器 [8]"
    },
    {
      "name": "vpadal_u16",
      "full name": "uint32x2_t vpadal_u16(uint32x2_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uadalp",
      "function_en": "[vector] uadalp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加并累加到目标寄存器 [16]"
    },
    {
      "name": "vpadalq_u16",
      "full name": "uint32x4_t vpadalq_u16(uint32x4_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uadalp",
      "function_en": "[vector] uadalp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加并累加到目标寄存器 [16]"
    },
    {
      "name": "vpadal_u32",
      "full name": "uint64x1_t vpadal_u32(uint64x1_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uadalp",
      "function_en": "[vector] uadalp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加并累加到目标寄存器 [32]"
    },
    {
      "name": "vpadalq_u32",
      "full name": "uint64x2_t vpadalq_u32(uint64x2_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uadalp",
      "function_en": "[vector] uadalp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素相加并累加到目标寄存器 [32]"
    },
    {
      "name": "vpmax_s8",
      "full name": "int8x8_t vpmax_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxp",
      "function_en": "[vector] smaxp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较大值 [8]"
    },
    {
      "name": "vpmax_s16",
      "full name": "int16x4_t vpmax_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxp",
      "function_en": "[vector] smaxp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较大值 [16]"
    },
    {
      "name": "vpmax_s32",
      "full name": "int32x2_t vpmax_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxp",
      "function_en": "[vector] smaxp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmax_u8",
      "full name": "uint8x8_t vpmax_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxp",
      "function_en": "[vector] umaxp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较大值 [8]"
    },
    {
      "name": "vpmax_u16",
      "full name": "uint16x4_t vpmax_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxp",
      "function_en": "[vector] umaxp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较大值 [16]"
    },
    {
      "name": "vpmax_u32",
      "full name": "uint32x2_t vpmax_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxp",
      "function_en": "[vector] umaxp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmax_f32",
      "full name": "float32x2_t vpmax_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxp",
      "function_en": "[vector] fmaxp [32]",
      "function_cn": "[向量] 浮点数类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxq_s8",
      "full name": "int8x16_t vpmaxq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxp",
      "function_en": "[vector] smaxp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较大值 [8]"
    },
    {
      "name": "vpmaxq_s16",
      "full name": "int16x8_t vpmaxq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxp",
      "function_en": "[vector] smaxp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较大值 [16]"
    },
    {
      "name": "vpmaxq_s32",
      "full name": "int32x4_t vpmaxq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxp",
      "function_en": "[vector] smaxp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxq_u8",
      "full name": "uint8x16_t vpmaxq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxp",
      "function_en": "[vector] umaxp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较大值 [8]"
    },
    {
      "name": "vpmaxq_u16",
      "full name": "uint16x8_t vpmaxq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxp",
      "function_en": "[vector] umaxp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较大值 [16]"
    },
    {
      "name": "vpmaxq_u32",
      "full name": "uint32x4_t vpmaxq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxp",
      "function_en": "[vector] umaxp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxq_f32",
      "full name": "float32x4_t vpmaxq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxp",
      "function_en": "[vector] fmaxp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxq_f64",
      "full name": "float64x2_t vpmaxq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxp",
      "function_en": "[vector] fmaxp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [64]"
    },
    {
      "name": "vpmin_s8",
      "full name": "int8x8_t vpmin_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminp",
      "function_en": "[vector] sminp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较小值 [8]"
    },
    {
      "name": "vpmin_s16",
      "full name": "int16x4_t vpmin_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminp",
      "function_en": "[vector] sminp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较小值 [16]"
    },
    {
      "name": "vpmin_s32",
      "full name": "int32x2_t vpmin_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminp",
      "function_en": "[vector] sminp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpmin_u8",
      "full name": "uint8x8_t vpmin_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminp",
      "function_en": "[vector] uminp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较小值 [8]"
    },
    {
      "name": "vpmin_u16",
      "full name": "uint16x4_t vpmin_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminp",
      "function_en": "[vector] uminp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较小值 [16]"
    },
    {
      "name": "vpmin_u32",
      "full name": "uint32x2_t vpmin_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminp",
      "function_en": "[vector] uminp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpmin_f32",
      "full name": "float32x2_t vpmin_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminp",
      "function_en": "[vector] fminp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminq_s8",
      "full name": "int8x16_t vpminq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminp",
      "function_en": "[vector] sminp [8]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较小值 [8]"
    },
    {
      "name": "vpminq_s16",
      "full name": "int16x8_t vpminq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminp",
      "function_en": "[vector] sminp [16]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较小值 [16]"
    },
    {
      "name": "vpminq_s32",
      "full name": "int32x4_t vpminq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminp",
      "function_en": "[vector] sminp [32]",
      "function_cn": "[向量] 有符号整数类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminq_u8",
      "full name": "uint8x16_t vpminq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminp",
      "function_en": "[vector] uminp [8]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较小值 [8]"
    },
    {
      "name": "vpminq_u16",
      "full name": "uint16x8_t vpminq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminp",
      "function_en": "[vector] uminp [16]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较小值 [16]"
    },
    {
      "name": "vpminq_u32",
      "full name": "uint32x4_t vpminq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminp",
      "function_en": "[vector] uminp [32]",
      "function_cn": "[向量] 无符号整数类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminq_f32",
      "full name": "float32x4_t vpminq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminp",
      "function_en": "[vector] fminp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminq_f64",
      "full name": "float64x2_t vpminq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminp",
      "function_en": "[vector] fminp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [64]"
    },
    {
      "name": "vpmaxnm_f32",
      "full name": "float32x2_t vpmaxnm_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmp",
      "function_en": "[vector] fmaxnmp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxnmq_f32",
      "full name": "float32x4_t vpmaxnmq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmp",
      "function_en": "[vector] fmaxnmp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxnmq_f64",
      "full name": "float64x2_t vpmaxnmq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmp",
      "function_en": "[vector] fmaxnmp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [64]"
    },
    {
      "name": "vpminnm_f32",
      "full name": "float32x2_t vpminnm_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmp",
      "function_en": "[vector] fminnmp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminnmq_f32",
      "full name": "float32x4_t vpminnmq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmp",
      "function_en": "[vector] fminnmp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminnmq_f64",
      "full name": "float64x2_t vpminnmq_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmp",
      "function_en": "[vector] fminnmp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [64]"
    },
    {
      "name": "vpaddd_s64",
      "full name": "int64_t vpaddd_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [64]",
      "function_cn": "[向量] 相邻元素相加 [64]"
    },
    {
      "name": "vpaddd_u64",
      "full name": "uint64_t vpaddd_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [64]",
      "function_cn": "[向量] 相邻元素相加 [64]"
    },
    {
      "name": "vpadds_f32",
      "full name": "float32_t vpadds_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [32]",
      "function_cn": "[向量] 浮点类型相邻元素相加 [32]"
    },
    {
      "name": "vpaddd_f64",
      "full name": "float64_t vpaddd_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [64]",
      "function_cn": "[向量] 浮点类型相邻元素相加 [64]"
    },
    {
      "name": "vpmaxs_f32",
      "full name": "float32_t vpmaxs_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxp",
      "function_en": "[vector] fmaxp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxqd_f64",
      "full name": "float64_t vpmaxqd_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxp",
      "function_en": "[vector] fmaxp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [64]"
    },
    {
      "name": "vpmins_f32",
      "full name": "float32_t vpmins_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminp",
      "function_en": "[vector] fminp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminqd_f64",
      "full name": "float64_t vpminqd_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminp",
      "function_en": "[vector] fminp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [64]"
    },
    {
      "name": "vpmaxnms_f32",
      "full name": "float32_t vpmaxnms_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmp",
      "function_en": "[vector] fmaxnmp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [32]"
    },
    {
      "name": "vpmaxnmqd_f64",
      "full name": "float64_t vpmaxnmqd_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmp",
      "function_en": "[vector] fmaxnmp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较大值 [64]"
    },
    {
      "name": "vpminnms_f32",
      "full name": "float32_t vpminnms_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmp",
      "function_en": "[vector] fminnmp [32]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [32]"
    },
    {
      "name": "vpminnmqd_f64",
      "full name": "float64_t vpminnmqd_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmp",
      "function_en": "[vector] fminnmp [64]",
      "function_cn": "[向量] 浮点类型相邻元素取较小值 [64]"
    },
    {
      "name": "vaddv_s8",
      "full name": "int8_t vaddv_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [8]",
      "function_cn": "[向量] 向量全部元素相加 [8]"
    },
    {
      "name": "vaddvq_s8",
      "full name": "int8_t vaddvq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [8]",
      "function_cn": "[向量] 向量全部元素相加 [8]"
    },
    {
      "name": "vaddv_s16",
      "full name": "int16_t vaddv_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [16]",
      "function_cn": "[向量] 向量全部元素相加 [16]"
    },
    {
      "name": "vaddvq_s16",
      "full name": "int16_t vaddvq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [16]",
      "function_cn": "[向量] 向量全部元素相加 [16]"
    },
    {
      "name": "vaddv_s32",
      "full name": "int32_t vaddv_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [32]",
      "function_cn": "[向量] 向量全部元素相加 [32]"
    },
    {
      "name": "vaddvq_s32",
      "full name": "int32_t vaddvq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [32]",
      "function_cn": "[向量] 向量全部元素相加 [32]"
    },
    {
      "name": "vaddvq_s64",
      "full name": "int64_t vaddvq_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [64]",
      "function_cn": "[向量] 向量全部元素相加 [64]"
    },
    {
      "name": "vaddv_u8",
      "full name": "uint8_t vaddv_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [8]",
      "function_cn": "[向量] 向量全部元素相加 [8]"
    },
    {
      "name": "vaddvq_u8",
      "full name": "uint8_t vaddvq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [8]",
      "function_cn": "[向量] 向量全部元素相加 [8]"
    },
    {
      "name": "vaddv_u16",
      "full name": "uint16_t vaddv_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [16]",
      "function_cn": "[向量] 向量全部元素相加 [16]"
    },
    {
      "name": "vaddvq_u16",
      "full name": "uint16_t vaddvq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [16]",
      "function_cn": "[向量] 向量全部元素相加 [16]"
    },
    {
      "name": "vaddv_u32",
      "full name": "uint32_t vaddv_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [32]",
      "function_cn": "[向量] 向量全部元素相加 [32]"
    },
    {
      "name": "vaddvq_u32",
      "full name": "uint32_t vaddvq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addv",
      "function_en": "[vector] addv [32]",
      "function_cn": "[向量] 向量全部元素相加 [32]"
    },
    {
      "name": "vaddvq_u64",
      "full name": "uint64_t vaddvq_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "addp",
      "function_en": "[vector] addp [64]",
      "function_cn": "[向量] 向量全部元素相加 [64]"
    },
    {
      "name": "vaddv_f32",
      "full name": "float32_t vaddv_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [32]",
      "function_cn": "[向量] 浮点类型向量全部元素相加 [32]"
    },
    {
      "name": "vaddvq_f32",
      "full name": "float32_t vaddvq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [32]",
      "function_cn": "[向量] 浮点类型向量全部元素相加 [32]"
    },
    {
      "name": "vaddvq_f64",
      "full name": "float64_t vaddvq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "faddp",
      "function_en": "[vector] faddp [64]",
      "function_cn": "[向量] 浮点类型向量全部元素相加 [64]"
    },
    {
      "name": "vaddlv_s8",
      "full name": "int16_t vaddlv_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlv",
      "function_en": "[vector] saddlv [8]",
      "function_cn": "[向量] 有符号整数类型向量全部元素相加 [8]"
    },
    {
      "name": "vaddlvq_s8",
      "full name": "int16_t vaddlvq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlv",
      "function_en": "[vector] saddlv [8]",
      "function_cn": "[向量] 有符号整数类型向量全部元素相加 [8]"
    },
    {
      "name": "vaddlv_s16",
      "full name": "int32_t vaddlv_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlv",
      "function_en": "[vector] saddlv [16]",
      "function_cn": "[向量] 有符号整数类型向量全部元素相加 [16]"
    },
    {
      "name": "vaddlvq_s16",
      "full name": "int32_t vaddlvq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlv",
      "function_en": "[vector] saddlv [16]",
      "function_cn": "[向量] 有符号整数类型向量全部元素相加 [16]"
    },
    {
      "name": "vaddlv_s32",
      "full name": "int64_t vaddlv_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlp",
      "function_en": "[vector] saddlp [32]",
      "function_cn": "[向量] 有符号整数类型向量全部元素相加 [32]"
    },
    {
      "name": "vaddlvq_s32",
      "full name": "int64_t vaddlvq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "saddlv",
      "function_en": "[vector] saddlv [32]",
      "function_cn": "[向量] 有符号整数类型向量全部元素相加 [32]"
    },
    {
      "name": "vaddlv_u8",
      "full name": "uint16_t vaddlv_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlv",
      "function_en": "[vector] uaddlv [8]",
      "function_cn": "[向量] 无符号整数类型向量全部元素相加 [8]"
    },
    {
      "name": "vaddlvq_u8",
      "full name": "uint16_t vaddlvq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlv",
      "function_en": "[vector] uaddlv [8]",
      "function_cn": "[向量] 无符号整数类型向量全部元素相加 [8]"
    },
    {
      "name": "vaddlv_u16",
      "full name": "uint32_t vaddlv_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlv",
      "function_en": "[vector] uaddlv [16]",
      "function_cn": "[向量] 无符号整数类型向量全部元素相加 [16]"
    },
    {
      "name": "vaddlvq_u16",
      "full name": "uint32_t vaddlvq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlv",
      "function_en": "[vector] uaddlv [16]",
      "function_cn": "[向量] 无符号整数类型向量全部元素相加 [16]"
    },
    {
      "name": "vaddlv_u32",
      "full name": "uint64_t vaddlv_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlp",
      "function_en": "[vector] uaddlp [32]",
      "function_cn": "[向量] 无符号整数类型向量全部元素相加 [32]"
    },
    {
      "name": "vaddlvq_u32",
      "full name": "uint64_t vaddlvq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uaddlv",
      "function_en": "[vector] uaddlv [32]",
      "function_cn": "[向量] 无符号整数类型向量全部元素相加 [32]"
    },
    {
      "name": "vmaxv_s8",
      "full name": "int8_t vmaxv_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxv",
      "function_en": "[vector] smaxv [8]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最大值 [8]"
    },
    {
      "name": "vmaxvq_s8",
      "full name": "int8_t vmaxvq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxv",
      "function_en": "[vector] smaxv [8]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最大值 [8]"
    },
    {
      "name": "vmaxv_s16",
      "full name": "int16_t vmaxv_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxv",
      "function_en": "[vector] smaxv [16]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最大值 [16]"
    },
    {
      "name": "vmaxvq_s16",
      "full name": "int16_t vmaxvq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxv",
      "function_en": "[vector] smaxv [16]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最大值 [16]"
    },
    {
      "name": "vmaxv_s32",
      "full name": "int32_t vmaxv_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxp",
      "function_en": "[vector] smaxp [32]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxvq_s32",
      "full name": "int32_t vmaxvq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smaxv",
      "function_en": "[vector] smaxv [32]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxv_u8",
      "full name": "uint8_t vmaxv_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxv",
      "function_en": "[vector] umaxv [8]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最大值 [8]"
    },
    {
      "name": "vmaxvq_u8",
      "full name": "uint8_t vmaxvq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxv",
      "function_en": "[vector] umaxv [8]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最大值 [8]"
    },
    {
      "name": "vmaxv_u16",
      "full name": "uint16_t vmaxv_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxv",
      "function_en": "[vector] umaxv [16]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最大值 [16]"
    },
    {
      "name": "vmaxvq_u16",
      "full name": "uint16_t vmaxvq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxv",
      "function_en": "[vector] umaxv [16]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最大值 [16]"
    },
    {
      "name": "vmaxv_u32",
      "full name": "uint32_t vmaxv_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxp",
      "function_en": "[vector] umaxp [32]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxvq_u32",
      "full name": "uint32_t vmaxvq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umaxv",
      "function_en": "[vector] umaxv [32]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxv_f32",
      "full name": "float32_t vmaxv_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxp",
      "function_en": "[vector] fmaxp [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxvq_f32",
      "full name": "float32_t vmaxvq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxv",
      "function_en": "[vector] fmaxv [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxvq_f64",
      "full name": "float64_t vmaxvq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxp",
      "function_en": "[vector] fmaxp [64]",
      "function_cn": "[向量] 浮点类型向量全部元素取最大值 [64]"
    },
    {
      "name": "vminv_s8",
      "full name": "int8_t vminv_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminv",
      "function_en": "[vector] sminv [8]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最小值 [8]"
    },
    {
      "name": "vminvq_s8",
      "full name": "int8_t vminvq_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminv",
      "function_en": "[vector] sminv [8]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最小值 [8]"
    },
    {
      "name": "vminv_s16",
      "full name": "int16_t vminv_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminv",
      "function_en": "[vector] sminv [16]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最小值 [16]"
    },
    {
      "name": "vminvq_s16",
      "full name": "int16_t vminvq_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminv",
      "function_en": "[vector] sminv [16]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最小值 [16]"
    },
    {
      "name": "vminv_s32",
      "full name": "int32_t vminv_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminp",
      "function_en": "[vector] sminp [32]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminvq_s32",
      "full name": "int32_t vminvq_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sminv",
      "function_en": "[vector] sminv [32]",
      "function_cn": "[向量] 有符号整数类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminv_u8",
      "full name": "uint8_t vminv_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminv",
      "function_en": "[vector] uminv [8]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最小值 [8]"
    },
    {
      "name": "vminvq_u8",
      "full name": "uint8_t vminvq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminv",
      "function_en": "[vector] uminv [8]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最小值 [8]"
    },
    {
      "name": "vminv_u16",
      "full name": "uint16_t vminv_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminv",
      "function_en": "[vector] uminv [16]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最小值 [16]"
    },
    {
      "name": "vminvq_u16",
      "full name": "uint16_t vminvq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminv",
      "function_en": "[vector] uminv [16]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最小值 [16]"
    },
    {
      "name": "vminv_u32",
      "full name": "uint32_t vminv_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminp",
      "function_en": "[vector] uminp [32]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminvq_u32",
      "full name": "uint32_t vminvq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uminv",
      "function_en": "[vector] uminv [32]",
      "function_cn": "[向量] 无符号整数类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminv_f32",
      "full name": "float32_t vminv_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminp",
      "function_en": "[vector] fminp [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminvq_f32",
      "full name": "float32_t vminvq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminv",
      "function_en": "[vector] fminv [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminvq_f64",
      "full name": "float64_t vminvq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminp",
      "function_en": "[vector] fminp [64]",
      "function_cn": "[向量] 浮点类型向量全部元素取最小值 [64]"
    },
    {
      "name": "vmaxnmv_f32",
      "full name": "float32_t vmaxnmv_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmp",
      "function_en": "[vector] fmaxnmp [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxnmvq_f32",
      "full name": "float32_t vmaxnmvq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmv",
      "function_en": "[vector] fmaxnmv [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最大值 [32]"
    },
    {
      "name": "vmaxnmvq_f64",
      "full name": "float64_t vmaxnmvq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmaxnmp",
      "function_en": "[vector] fmaxnmp [64]",
      "function_cn": "[向量] 浮点类型向量全部元素取最大值 [64]"
    },
    {
      "name": "vminnmv_f32",
      "full name": "float32_t vminnmv_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmp",
      "function_en": "[vector] fminnmp [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminnmvq_f32",
      "full name": "float32_t vminnmvq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmv",
      "function_en": "[vector] fminnmv [32]",
      "function_cn": "[向量] 浮点类型向量全部元素取最小值 [32]"
    },
    {
      "name": "vminnmvq_f64",
      "full name": "float64_t vminnmvq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fminnmp",
      "function_en": "[vector] fminnmp [64]",
      "function_cn": "[向量] 浮点类型向量全部元素取最小值 [64]"
    },
    {
      "name": "vext_s8",
      "full name": "int8x8_t vext_s8(int8x8_t a, int8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [8]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [8]"
    },
    {
      "name": "vextq_s8",
      "full name": "int8x16_t vextq_s8(int8x16_t a, int8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [8]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [8]"
    },
    {
      "name": "vext_s16",
      "full name": "int16x4_t vext_s16(int16x4_t a, int16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [16]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [16]"
    },
    {
      "name": "vextq_s16",
      "full name": "int16x8_t vextq_s16(int16x8_t a, int16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [16]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [16]"
    },
    {
      "name": "vext_s32",
      "full name": "int32x2_t vext_s32(int32x2_t a, int32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [32]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [32]"
    },
    {
      "name": "vextq_s32",
      "full name": "int32x4_t vextq_s32(int32x4_t a, int32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [32]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [32]"
    },
    {
      "name": "vext_s64",
      "full name": "int64x1_t vext_s64(int64x1_t a, int64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vextq_s64",
      "full name": "int64x2_t vextq_s64(int64x2_t a, int64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vext_u8",
      "full name": "uint8x8_t vext_u8(uint8x8_t a, uint8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [8]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [8]"
    },
    {
      "name": "vextq_u8",
      "full name": "uint8x16_t vextq_u8(uint8x16_t a, uint8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [8]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [8]"
    },
    {
      "name": "vext_u16",
      "full name": "uint16x4_t vext_u16(uint16x4_t a, uint16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [16]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [16]"
    },
    {
      "name": "vextq_u16",
      "full name": "uint16x8_t vextq_u16(uint16x8_t a, uint16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [16]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [16]"
    },
    {
      "name": "vext_u32",
      "full name": "uint32x2_t vext_u32(uint32x2_t a, uint32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [32]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [32]"
    },
    {
      "name": "vextq_u32",
      "full name": "uint32x4_t vextq_u32(uint32x4_t a, uint32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [32]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [32]"
    },
    {
      "name": "vext_u64",
      "full name": "uint64x1_t vext_u64(uint64x1_t a, uint64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vextq_u64",
      "full name": "uint64x2_t vextq_u64(uint64x2_t a, uint64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vext_p64",
      "full name": "poly64x1_t vext_p64(poly64x1_t a, poly64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vextq_p64",
      "full name": "poly64x2_t vextq_p64(poly64x2_t a, poly64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vext_f32",
      "full name": "float32x2_t vext_f32(float32x2_t a, float32x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [32]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [32]"
    },
    {
      "name": "vextq_f32",
      "full name": "float32x4_t vextq_f32(float32x4_t a, float32x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [32]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [32]"
    },
    {
      "name": "vext_f64",
      "full name": "float64x1_t vext_f64(float64x1_t a, float64x1_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vextq_f64",
      "full name": "float64x2_t vextq_f64(float64x2_t a, float64x2_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [64]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [64]"
    },
    {
      "name": "vext_p8",
      "full name": "poly8x8_t vext_p8(poly8x8_t a, poly8x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [8]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [8]"
    },
    {
      "name": "vextq_p8",
      "full name": "poly8x16_t vextq_p8(poly8x16_t a, poly8x16_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [8]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [8]"
    },
    {
      "name": "vext_p16",
      "full name": "poly16x4_t vext_p16(poly16x4_t a, poly16x4_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [16]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [16]"
    },
    {
      "name": "vextq_p16",
      "full name": "poly16x8_t vextq_p16(poly16x8_t a, poly16x8_t b, const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ext",
      "function_en": "[vector] ext [16]",
      "function_cn": "[向量] 从指定位置开始提取向量元素 [16]"
    },
    {
      "name": "vrev64_s8",
      "full name": "int8x8_t vrev64_s8(int8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev64q_s8",
      "full name": "int8x16_t vrev64q_s8(int8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev64_s16",
      "full name": "int16x4_t vrev64_s16(int16x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev64q_s16",
      "full name": "int16x8_t vrev64q_s16(int16x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev64_s32",
      "full name": "int32x2_t vrev64_s32(int32x2_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [64]",
      "function_cn": "[向量] 反转向量中的元素 [64]"
    },
    {
      "name": "vrev64q_s32",
      "full name": "int32x4_t vrev64q_s32(int32x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [64]",
      "function_cn": "[向量] 反转向量中的元素 [64]"
    },
    {
      "name": "vrev64_u8",
      "full name": "uint8x8_t vrev64_u8(uint8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev64q_u8",
      "full name": "uint8x16_t vrev64q_u8(uint8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev64_u16",
      "full name": "uint16x4_t vrev64_u16(uint16x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev64q_u16",
      "full name": "uint16x8_t vrev64q_u16(uint16x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev64_u32",
      "full name": "uint32x2_t vrev64_u32(uint32x2_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [32]",
      "function_cn": "[向量] 反转向量中的元素 [32]"
    },
    {
      "name": "vrev64q_u32",
      "full name": "uint32x4_t vrev64q_u32(uint32x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [32]",
      "function_cn": "[向量] 反转向量中的元素 [32]"
    },
    {
      "name": "vrev64_f32",
      "full name": "float32x2_t vrev64_f32(float32x2_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [32]",
      "function_cn": "[向量] 反转向量中的元素 [32]"
    },
    {
      "name": "vrev64q_f32",
      "full name": "float32x4_t vrev64q_f32(float32x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [32]",
      "function_cn": "[向量] 反转向量中的元素 [32]"
    },
    {
      "name": "vrev64_p8",
      "full name": "poly8x8_t vrev64_p8(poly8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev64q_p8",
      "full name": "poly8x16_t vrev64q_p8(poly8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev64_p16",
      "full name": "poly16x4_t vrev64_p16(poly16x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev64q_p16",
      "full name": "poly16x8_t vrev64q_p16(poly16x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev64",
      "function_en": "[vector] rev64 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev32_s8",
      "full name": "int8x8_t vrev32_s8(int8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev32q_s8",
      "full name": "int8x16_t vrev32q_s8(int8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev32_s16",
      "full name": "int16x4_t vrev32_s16(int16x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev32q_s16",
      "full name": "int16x8_t vrev32q_s16(int16x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev32_u8",
      "full name": "uint8x8_t vrev32_u8(uint8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev32q_u8",
      "full name": "uint8x16_t vrev32q_u8(uint8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev32_u16",
      "full name": "uint16x4_t vrev32_u16(uint16x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [32]",
      "function_cn": "[向量] 反转向量中的元素 [32]"
    },
    {
      "name": "vrev32q_u16",
      "full name": "uint16x8_t vrev32q_u16(uint16x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [32]",
      "function_cn": "[向量] 反转向量中的元素 [32]"
    },
    {
      "name": "vrev32_p8",
      "full name": "poly8x8_t vrev32_p8(poly8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev32q_p8",
      "full name": "poly8x16_t vrev32q_p8(poly8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev32_p16",
      "full name": "poly16x4_t vrev32_p16(poly16x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev32q_p16",
      "full name": "poly16x8_t vrev32q_p16(poly16x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev32",
      "function_en": "[vector] rev32 [16]",
      "function_cn": "[向量] 反转向量中的元素 [16]"
    },
    {
      "name": "vrev16_s8",
      "full name": "int8x8_t vrev16_s8(int8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev16",
      "function_en": "[vector] rev16 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev16q_s8",
      "full name": "int8x16_t vrev16q_s8(int8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev16",
      "function_en": "[vector] rev16 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev16_u8",
      "full name": "uint8x8_t vrev16_u8(uint8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev16",
      "function_en": "[vector] rev16 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev16q_u8",
      "full name": "uint8x16_t vrev16q_u8(uint8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev16",
      "function_en": "[vector] rev16 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev16_p8",
      "full name": "poly8x8_t vrev16_p8(poly8x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev16",
      "function_en": "[vector] rev16 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vrev16q_p8",
      "full name": "poly8x16_t vrev16q_p8(poly8x16_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "rev16",
      "function_en": "[vector] rev16 [8]",
      "function_cn": "[向量] 反转向量中的元素 [8]"
    },
    {
      "name": "vzip1_s8",
      "full name": "int8x8_t vzip1_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "_mm_unpacklo_pi8",
      "Intel Asm": "punpcklbw",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [8]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [8]"
    },
    {
      "name": "vzip1q_s8",
      "full name": "int8x16_t vzip1q_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_unpacklo_epi8",
      "Intel Asm": "punpcklbw",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [8]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [8]"
    },
    {
      "name": "vzip1_s16",
      "full name": "int16x4_t vzip1_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_unpacklo_pi16",
      "Intel Asm": "punpcklwd",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [16]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [16]"
    },
    {
      "name": "vzip1q_s16",
      "full name": "int16x8_t vzip1q_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_unpacklo_epi16",
      "Intel Asm": "punpcklwd",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [16]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [16]"
    },
    {
      "name": "vzip1_s32",
      "full name": "int32x2_t vzip1_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "_mm_unpacklo_pi32",
      "Intel Asm": "punpckldq",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [32]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [32]"
    },
    {
      "name": "vzip1q_s32",
      "full name": "int32x4_t vzip1q_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_unpacklo_epi32",
      "Intel Asm": "punpckldq",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [32]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [32]"
    },
    {
      "name": "vzip1q_s64",
      "full name": "int64x2_t vzip1q_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_unpacklo_epi64",
      "Intel Asm": "punpcklqdq",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [64]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [64]"
    },
    {
      "name": "vzip1_u8",
      "full name": "uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [8]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [8]"
    },
    {
      "name": "vzip1q_u8",
      "full name": "uint8x16_t vzip1q_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [8]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [8]"
    },
    {
      "name": "vzip1_u16",
      "full name": "uint16x4_t vzip1_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [16]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [16]"
    },
    {
      "name": "vzip1q_u16",
      "full name": "uint16x8_t vzip1q_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [16]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [16]"
    },
    {
      "name": "vzip1_u32",
      "full name": "uint32x2_t vzip1_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [32]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [32]"
    },
    {
      "name": "vzip1q_u32",
      "full name": "uint32x4_t vzip1q_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [32]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [32]"
    },
    {
      "name": "vzip1q_u64",
      "full name": "uint64x2_t vzip1q_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [64]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [64]"
    },
    {
      "name": "vzip1q_p64",
      "full name": "poly64x2_t vzip1q_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [64]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [64]"
    },
    {
      "name": "vzip1_f32",
      "full name": "float32x2_t vzip1_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [32]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [32]"
    },
    {
      "name": "vzip1q_f32",
      "full name": "float32x4_t vzip1q_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_unpacklo_ps",
      "Intel Asm": "unpcklps",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [32]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [32]"
    },
    {
      "name": "vzip1q_f64",
      "full name": "float64x2_t vzip1q_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_unpacklo_pd",
      "Intel Asm": "unpcklpd",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [64]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [64]"
    },
    {
      "name": "vzip1_p8",
      "full name": "poly8x8_t vzip1_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [8]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [8]"
    },
    {
      "name": "vzip1q_p8",
      "full name": "poly8x16_t vzip1q_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [8]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [8]"
    },
    {
      "name": "vzip1_p16",
      "full name": "poly16x4_t vzip1_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [16]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [16]"
    },
    {
      "name": "vzip1q_p16",
      "full name": "poly16x8_t vzip1q_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1",
      "function_en": "[vector] zip1 [16]",
      "function_cn": "[向量] 交错读取两个向量的低半部分元素 [16]"
    },
    {
      "name": "vzip2_s8",
      "full name": "int8x8_t vzip2_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "_mm_unpackhi_pi8",
      "Intel Asm": "punpckhbw",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [8]"
    },
    {
      "name": "vzip2q_s8",
      "full name": "int8x16_t vzip2q_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "_mm_unpackhi_epi8",
      "Intel Asm": "punpckhbw",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [8]"
    },
    {
      "name": "vzip2_s16",
      "full name": "int16x4_t vzip2_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "_mm_unpackhi_pi16",
      "Intel Asm": "punpcklbw",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [16]"
    },
    {
      "name": "vzip2q_s16",
      "full name": "int16x8_t vzip2q_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "_mm_unpackhi_epi16",
      "Intel Asm": "punpckhwd",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [16]"
    },
    {
      "name": "vzip2_s32",
      "full name": "int32x2_t vzip2_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "_mm_unpackhi_pi32",
      "Intel Asm": "punpckhdq",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [32]"
    },
    {
      "name": "vzip2q_s32",
      "full name": "int32x4_t vzip2q_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "_mm_unpackhi_epi32",
      "Intel Asm": "punpckhdq",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [32]"
    },
    {
      "name": "vzip2q_s64",
      "full name": "int64x2_t vzip2q_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "_mm_unpackhi_epi64",
      "Intel Asm": "punpckhqdq",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [64]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [64]"
    },
    {
      "name": "vzip2_u8",
      "full name": "uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [8]"
    },
    {
      "name": "vzip2q_u8",
      "full name": "uint8x16_t vzip2q_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [8]"
    },
    {
      "name": "vzip2_u16",
      "full name": "uint16x4_t vzip2_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [16]"
    },
    {
      "name": "vzip2q_u16",
      "full name": "uint16x8_t vzip2q_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [16]"
    },
    {
      "name": "vzip2_u32",
      "full name": "uint32x2_t vzip2_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [32]"
    },
    {
      "name": "vzip2q_u32",
      "full name": "uint32x4_t vzip2q_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [32]"
    },
    {
      "name": "vzip2q_u64",
      "full name": "uint64x2_t vzip2q_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [64]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [64]"
    },
    {
      "name": "vzip2q_p64",
      "full name": "poly64x2_t vzip2q_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [64]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [64]"
    },
    {
      "name": "vzip2_f32",
      "full name": "float32x2_t vzip2_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [32]"
    },
    {
      "name": "vzip2q_f32",
      "full name": "float32x4_t vzip2q_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "_mm_unpackhi_ps",
      "Intel Asm": "unpckhps",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [32]"
    },
    {
      "name": "vzip2q_f64",
      "full name": "float64x2_t vzip2q_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "_mm_unpackhi_pd",
      "Intel Asm": "unpckhpd",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [64]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [64]"
    },
    {
      "name": "vzip2_p8",
      "full name": "poly8x8_t vzip2_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [8]"
    },
    {
      "name": "vzip2q_p8",
      "full name": "poly8x16_t vzip2q_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [8]"
    },
    {
      "name": "vzip2_p16",
      "full name": "poly16x4_t vzip2_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [16]"
    },
    {
      "name": "vzip2q_p16",
      "full name": "poly16x8_t vzip2q_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip2",
      "function_en": "[vector] zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量的高半部分元素 [16]"
    },
    {
      "name": "vuzp1_s8",
      "full name": "int8x8_t vuzp1_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [8]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp1q_s8",
      "full name": "int8x16_t vuzp1q_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [8]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp1_s16",
      "full name": "int16x4_t vuzp1_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [16]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp1q_s16",
      "full name": "int16x8_t vuzp1q_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [16]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp1_s32",
      "full name": "int32x2_t vuzp1_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [32]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp1q_s32",
      "full name": "int32x4_t vuzp1q_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [32]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp1q_s64",
      "full name": "int64x2_t vuzp1q_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [64]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp1_u8",
      "full name": "uint8x8_t vuzp1_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [8]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp1q_u8",
      "full name": "uint8x16_t vuzp1q_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [8]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp1_u16",
      "full name": "uint16x4_t vuzp1_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [16]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp1q_u16",
      "full name": "uint16x8_t vuzp1q_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [16]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp1_u32",
      "full name": "uint32x2_t vuzp1_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [32]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp1q_u32",
      "full name": "uint32x4_t vuzp1q_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [32]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp1q_u64",
      "full name": "uint64x2_t vuzp1q_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [64]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp1q_p64",
      "full name": "poly64x2_t vuzp1q_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [64]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp1_f32",
      "full name": "float32x2_t vuzp1_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [32]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp1q_f32",
      "full name": "float32x4_t vuzp1q_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [32]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp1q_f64",
      "full name": "float64x2_t vuzp1q_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [64]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp1_p8",
      "full name": "poly8x8_t vuzp1_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [8]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp1q_p8",
      "full name": "poly8x16_t vuzp1q_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [8]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp1_p16",
      "full name": "poly16x4_t vuzp1_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [16]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp1q_p16",
      "full name": "poly16x8_t vuzp1q_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1",
      "function_en": "[vector] uzp1 [16]",
      "function_cn": "[向量] 获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp2_s8",
      "full name": "int8x8_t vuzp2_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [8]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp2q_s8",
      "full name": "int8x16_t vuzp2q_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [8]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp2_s16",
      "full name": "int16x4_t vuzp2_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [16]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp2q_s16",
      "full name": "int16x8_t vuzp2q_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [16]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp2_s32",
      "full name": "int32x2_t vuzp2_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [32]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp2q_s32",
      "full name": "int32x4_t vuzp2q_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [32]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp2q_s64",
      "full name": "int64x2_t vuzp2q_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [64]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp2_u8",
      "full name": "uint8x8_t vuzp2_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [8]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp2q_u8",
      "full name": "uint8x16_t vuzp2q_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [8]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp2_u16",
      "full name": "uint16x4_t vuzp2_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [16]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp2q_u16",
      "full name": "uint16x8_t vuzp2q_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [16]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp2_u32",
      "full name": "uint32x2_t vuzp2_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [32]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp2q_u32",
      "full name": "uint32x4_t vuzp2q_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [32]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp2q_u64",
      "full name": "uint64x2_t vuzp2q_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [64]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp2q_p64",
      "full name": "poly64x2_t vuzp2q_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [64]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp2_f32",
      "full name": "float32x2_t vuzp2_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [32]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp2q_f32",
      "full name": "float32x4_t vuzp2q_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [32]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vuzp2q_f64",
      "full name": "float64x2_t vuzp2q_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [64]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vuzp2_p8",
      "full name": "poly8x8_t vuzp2_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [8]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp2q_p8",
      "full name": "poly8x16_t vuzp2q_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [8]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vuzp2_p16",
      "full name": "poly16x4_t vuzp2_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [16]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vuzp2q_p16",
      "full name": "poly16x8_t vuzp2q_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp2",
      "function_en": "[vector] uzp2 [16]",
      "function_cn": "[向量] 获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn1_s8",
      "full name": "int8x8_t vtrn1_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [8]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn1q_s8",
      "full name": "int8x16_t vtrn1q_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [8]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn1_s16",
      "full name": "int16x4_t vtrn1_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [16]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn1q_s16",
      "full name": "int16x8_t vtrn1q_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [16]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn1_s32",
      "full name": "int32x2_t vtrn1_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [32]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn1q_s32",
      "full name": "int32x4_t vtrn1q_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [32]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn1q_s64",
      "full name": "int64x2_t vtrn1q_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [64]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn1_u8",
      "full name": "uint8x8_t vtrn1_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [8]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn1q_u8",
      "full name": "uint8x16_t vtrn1q_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [8]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn1_u16",
      "full name": "uint16x4_t vtrn1_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [16]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn1q_u16",
      "full name": "uint16x8_t vtrn1q_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [16]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn1_u32",
      "full name": "uint32x2_t vtrn1_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [32]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn1q_u32",
      "full name": "uint32x4_t vtrn1q_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [32]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn1q_u64",
      "full name": "uint64x2_t vtrn1q_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [64]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn1q_p64",
      "full name": "poly64x2_t vtrn1q_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [64]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn1_f32",
      "full name": "float32x2_t vtrn1_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [32]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn1q_f32",
      "full name": "float32x4_t vtrn1q_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [32]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn1q_f64",
      "full name": "float64x2_t vtrn1q_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [64]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn1_p8",
      "full name": "poly8x8_t vtrn1_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [8]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn1q_p8",
      "full name": "poly8x16_t vtrn1q_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [8]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn1_p16",
      "full name": "poly16x4_t vtrn1_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [16]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn1q_p16",
      "full name": "poly16x8_t vtrn1q_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [16]",
      "function_cn": "[向量] 交错获取两个向量的偶数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn2_s8",
      "full name": "int8x8_t vtrn2_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [8]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn2q_s8",
      "full name": "int8x16_t vtrn2q_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [8]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn2_s16",
      "full name": "int16x4_t vtrn2_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [16]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn2q_s16",
      "full name": "int16x8_t vtrn2q_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [16]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn2_s32",
      "full name": "int32x2_t vtrn2_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [32]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn2q_s32",
      "full name": "int32x4_t vtrn2q_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [32]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn2q_s64",
      "full name": "int64x2_t vtrn2q_s64(int64x2_t a, int64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [64]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn2_u8",
      "full name": "uint8x8_t vtrn2_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [8]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn2q_u8",
      "full name": "uint8x16_t vtrn2q_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [8]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn2_u16",
      "full name": "uint16x4_t vtrn2_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [16]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn2q_u16",
      "full name": "uint16x8_t vtrn2q_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [16]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn2_u32",
      "full name": "uint32x2_t vtrn2_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [32]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn2q_u32",
      "full name": "uint32x4_t vtrn2q_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [32]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn2q_u64",
      "full name": "uint64x2_t vtrn2q_u64(uint64x2_t a, uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [64]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn2q_p64",
      "full name": "poly64x2_t vtrn2q_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [64]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn2_f32",
      "full name": "float32x2_t vtrn2_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [32]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn2q_f32",
      "full name": "float32x4_t vtrn2q_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [32]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn2q_f64",
      "full name": "float64x2_t vtrn2q_f64(float64x2_t a, float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [64]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [64]"
    },
    {
      "name": "vtrn2_p8",
      "full name": "poly8x8_t vtrn2_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [8]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn2q_p8",
      "full name": "poly8x16_t vtrn2q_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [8]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn2_p16",
      "full name": "poly16x4_t vtrn2_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [16]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn2q_p16",
      "full name": "poly16x8_t vtrn2q_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn2",
      "function_en": "[vector] trn2 [16]",
      "function_cn": "[向量] 交错获取两个向量的奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtbl1_s8",
      "full name": "int8x8_t vtbl1_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl1_u8",
      "full name": "uint8x8_t vtbl1_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl1_p8",
      "full name": "poly8x8_t vtbl1_p8(poly8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx1_s8",
      "full name": "int8x8_t vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "movi cmhs tbl bif",
      "function_en": "[vector] movi cmhs tbl bif [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx1_u8",
      "full name": "uint8x8_t vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "movi cmhs tbl bif",
      "function_en": "[vector] movi cmhs tbl bif [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx1_p8",
      "full name": "poly8x8_t vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "movi cmhs tbl bif",
      "function_en": "[vector] movi cmhs tbl bif [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl2_s8",
      "full name": "int8x8_t vtbl2_s8(int8x8x2_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl2_u8",
      "full name": "uint8x8_t vtbl2_u8(uint8x8x2_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl2_p8",
      "full name": "poly8x8_t vtbl2_p8(poly8x8x2_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl3_s8",
      "full name": "int8x8_t vtbl3_s8(int8x8x3_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl3_u8",
      "full name": "uint8x8_t vtbl3_u8(uint8x8x3_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl3_p8",
      "full name": "poly8x8_t vtbl3_p8(poly8x8x3_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl4_s8",
      "full name": "int8x8_t vtbl4_s8(int8x8x4_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl4_u8",
      "full name": "uint8x8_t vtbl4_u8(uint8x8x4_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbl4_p8",
      "full name": "poly8x8_t vtbl4_p8(poly8x8x4_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx2_s8",
      "full name": "int8x8_t vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx2_u8",
      "full name": "uint8x8_t vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx2_p8",
      "full name": "poly8x8_t vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx3_s8",
      "full name": "int8x8_t vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "movi cmhs tbl bif",
      "function_en": "[vector] movi cmhs tbl bif [8]",
      "function_cn": "[向量] movi [8]"
    },
    {
      "name": "vtbx3_u8",
      "full name": "uint8x8_t vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "movi cmhs tbl bif",
      "function_en": "[vector] movi cmhs tbl bif [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx3_p8",
      "full name": "poly8x8_t vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "movi cmhs tbl bif",
      "function_en": "[vector] movi cmhs tbl bif [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx4_s8",
      "full name": "int8x8_t vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx4_u8",
      "full name": "uint8x8_t vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vtbx4_p8",
      "full name": "poly8x8_t vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl1_s8",
      "full name": "int8x8_t vqtbl1_s8(int8x16_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl1q_s8",
      "full name": "int8x16_t vqtbl1q_s8(int8x16_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl1_u8",
      "full name": "uint8x8_t vqtbl1_u8(uint8x16_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl1q_u8",
      "full name": "uint8x16_t vqtbl1q_u8(uint8x16_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl1_p8",
      "full name": "poly8x8_t vqtbl1_p8(poly8x16_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl1q_p8",
      "full name": "poly8x16_t vqtbl1q_p8(poly8x16_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx1_s8",
      "full name": "int8x8_t vqtbx1_s8(int8x8_t a, int8x16_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx1q_s8",
      "full name": "int8x16_t vqtbx1q_s8(int8x16_t a, int8x16_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx1_u8",
      "full name": "uint8x8_t vqtbx1_u8(uint8x8_t a, uint8x16_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx1q_u8",
      "full name": "uint8x16_t vqtbx1q_u8(uint8x16_t a, uint8x16_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx1_p8",
      "full name": "poly8x8_t vqtbx1_p8(poly8x8_t a, poly8x16_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx1q_p8",
      "full name": "poly8x16_t vqtbx1q_p8(poly8x16_t a, poly8x16_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl2_s8",
      "full name": "int8x8_t vqtbl2_s8(int8x16x2_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl2q_s8",
      "full name": "int8x16_t vqtbl2q_s8(int8x16x2_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl2_u8",
      "full name": "uint8x8_t vqtbl2_u8(uint8x16x2_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl2q_u8",
      "full name": "uint8x16_t vqtbl2q_u8(uint8x16x2_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl2_p8",
      "full name": "poly8x8_t vqtbl2_p8(poly8x16x2_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl2q_p8",
      "full name": "poly8x16_t vqtbl2q_p8(poly8x16x2_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl3_s8",
      "full name": "int8x8_t vqtbl3_s8(int8x16x3_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl3q_s8",
      "full name": "int8x16_t vqtbl3q_s8(int8x16x3_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl3_u8",
      "full name": "uint8x8_t vqtbl3_u8(uint8x16x3_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl3q_u8",
      "full name": "uint8x16_t vqtbl3q_u8(uint8x16x3_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl3_p8",
      "full name": "poly8x8_t vqtbl3_p8(poly8x16x3_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl3q_p8",
      "full name": "poly8x16_t vqtbl3q_p8(poly8x16x3_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl4_s8",
      "full name": "int8x8_t vqtbl4_s8(int8x16x4_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl4q_s8",
      "full name": "int8x16_t vqtbl4q_s8(int8x16x4_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl4_u8",
      "full name": "uint8x8_t vqtbl4_u8(uint8x16x4_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl4q_u8",
      "full name": "uint8x16_t vqtbl4q_u8(uint8x16x4_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl4_p8",
      "full name": "poly8x8_t vqtbl4_p8(poly8x16x4_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbl4q_p8",
      "full name": "poly8x16_t vqtbl4q_p8(poly8x16x4_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbl",
      "function_en": "[vector] tbl [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx2_s8",
      "full name": "int8x8_t vqtbx2_s8(int8x8_t a, int8x16x2_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx2q_s8",
      "full name": "int8x16_t vqtbx2q_s8(int8x16_t a, int8x16x2_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx2_u8",
      "full name": "uint8x8_t vqtbx2_u8(uint8x8_t a, uint8x16x2_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx2q_u8",
      "full name": "uint8x16_t vqtbx2q_u8(uint8x16_t a, uint8x16x2_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx2_p8",
      "full name": "poly8x8_t vqtbx2_p8(poly8x8_t a, poly8x16x2_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx2q_p8",
      "full name": "poly8x16_t vqtbx2q_p8(poly8x16_t a, poly8x16x2_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx3_s8",
      "full name": "int8x8_t vqtbx3_s8(int8x8_t a, int8x16x3_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx3q_s8",
      "full name": "int8x16_t vqtbx3q_s8(int8x16_t a, int8x16x3_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx3_u8",
      "full name": "uint8x8_t vqtbx3_u8(uint8x8_t a, uint8x16x3_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx3q_u8",
      "full name": "uint8x16_t vqtbx3q_u8(uint8x16_t a, uint8x16x3_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx3_p8",
      "full name": "poly8x8_t vqtbx3_p8(poly8x8_t a, poly8x16x3_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx3q_p8",
      "full name": "poly8x16_t vqtbx3q_p8(poly8x16_t a, poly8x16x3_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx4_s8",
      "full name": "int8x8_t vqtbx4_s8(int8x8_t a, int8x16x4_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx4q_s8",
      "full name": "int8x16_t vqtbx4q_s8(int8x16_t a, int8x16x4_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx4_u8",
      "full name": "uint8x8_t vqtbx4_u8(uint8x8_t a, uint8x16x4_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx4q_u8",
      "full name": "uint8x16_t vqtbx4q_u8(uint8x16_t a, uint8x16x4_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx4_p8",
      "full name": "poly8x8_t vqtbx4_p8(poly8x8_t a, poly8x16x4_t t, uint8x8_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vqtbx4q_p8",
      "full name": "poly8x16_t vqtbx4q_p8(poly8x16_t a, poly8x16x4_t t, uint8x16_t idx)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "tbx",
      "function_en": "[vector] tbx [8]",
      "function_cn": "[向量] 表向量查找 [8]"
    },
    {
      "name": "vget_lane_u8",
      "full name": "uint8_t vget_lane_u8(uint8x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [8]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [8]"
    },
    {
      "name": "vget_lane_u16",
      "full name": "uint16_t vget_lane_u16(uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [16]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [16]"
    },
    {
      "name": "vget_lane_u32",
      "full name": "uint32_t vget_lane_u32(uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [32]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [32]"
    },
    {
      "name": "vget_lane_u64",
      "full name": "uint64_t vget_lane_u64(uint64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [64]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [64]"
    },
    {
      "name": "vget_lane_p64",
      "full name": "poly64_t vget_lane_p64(poly64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [64]",
      "function_cn": "[向量] 提取向量指定元素 [64]"
    },
    {
      "name": "vget_lane_s8",
      "full name": "int8_t vget_lane_s8(int8x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smov",
      "function_en": "[vector] smov [8]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [8]"
    },
    {
      "name": "vget_lane_s16",
      "full name": "int16_t vget_lane_s16(int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smov",
      "function_en": "[vector] smov [16]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [16]"
    },
    {
      "name": "vget_lane_s32",
      "full name": "int32_t vget_lane_s32(int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smov",
      "function_en": "[vector] smov [32]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [32]"
    },
    {
      "name": "vget_lane_s64",
      "full name": "int64_t vget_lane_s64(int64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [64]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [64]"
    },
    {
      "name": "vget_lane_p8",
      "full name": "poly8_t vget_lane_p8(poly8x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [8]",
      "function_cn": "[向量] 提取向量指定元素 [8]"
    },
    {
      "name": "vget_lane_p16",
      "full name": "poly16_t vget_lane_p16(poly16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [16]",
      "function_cn": "[向量] 提取向量指定元素 [16]"
    },
    {
      "name": "vget_lane_f32",
      "full name": "float32_t vget_lane_f32(float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 浮点类型提取向量指定元素 [32]"
    },
    {
      "name": "vget_lane_f64",
      "full name": "float64_t vget_lane_f64(float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 浮点类型提取向量指定元素 [64]"
    },
    {
      "name": "vgetq_lane_u8",
      "full name": "uint8_t vgetq_lane_u8(uint8x16_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [8]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [8]"
    },
    {
      "name": "vgetq_lane_u16",
      "full name": "uint16_t vgetq_lane_u16(uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [16]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [16]"
    },
    {
      "name": "vgetq_lane_u32",
      "full name": "uint32_t vgetq_lane_u32(uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [32]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [32]"
    },
    {
      "name": "vgetq_lane_u64",
      "full name": "uint64_t vgetq_lane_u64(uint64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [64]",
      "function_cn": "[向量] 无符号整数类型提取向量指定元素 [64]"
    },
    {
      "name": "vgetq_lane_p64",
      "full name": "poly64_t vgetq_lane_p64(poly64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [64]",
      "function_cn": "[向量] 提取向量指定元素 [64]"
    },
    {
      "name": "vgetq_lane_s8",
      "full name": "int8_t vgetq_lane_s8(int8x16_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smov",
      "function_en": "[vector] smov [8]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [8]"
    },
    {
      "name": "vgetq_lane_s16",
      "full name": "int16_t vgetq_lane_s16(int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smov",
      "function_en": "[vector] smov [16]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [16]"
    },
    {
      "name": "vgetq_lane_s32",
      "full name": "int32_t vgetq_lane_s32(int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "smov",
      "function_en": "[vector] smov [32]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [32]"
    },
    {
      "name": "vgetq_lane_s64",
      "full name": "int64_t vgetq_lane_s64(int64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [64]",
      "function_cn": "[向量] 有符号整数类型提取向量指定元素 [64]"
    },
    {
      "name": "vgetq_lane_p8",
      "full name": "poly8_t vgetq_lane_p8(poly8x16_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [8]",
      "function_cn": "[向量] 提取向量指定元素 [8]"
    },
    {
      "name": "vgetq_lane_p16",
      "full name": "poly16_t vgetq_lane_p16(poly16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "umov",
      "function_en": "[vector] umov [16]",
      "function_cn": "[向量] 提取向量指定元素 [16]"
    },
    {
      "name": "vget_lane_f16",
      "full name": "float16_t vget_lane_f16(float16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 浮点类型提取向量指定元素 [16]"
    },
    {
      "name": "vgetq_lane_f16",
      "full name": "float16_t vgetq_lane_f16(float16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [16]",
      "function_cn": "[向量] 浮点类型提取向量指定元素 [16]"
    },
    {
      "name": "vgetq_lane_f32",
      "full name": "float32_t vgetq_lane_f32(float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [32]",
      "function_cn": "[向量] 浮点类型提取向量指定元素 [32]"
    },
    {
      "name": "vgetq_lane_f64",
      "full name": "float64_t vgetq_lane_f64(float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "dup",
      "function_en": "[vector] dup [64]",
      "function_cn": "[向量] 浮点类型提取向量指定元素 [64]"
    },
    {
      "name": "vset_lane_u8",
      "full name": "uint8x8_t vset_lane_u8(uint8_t a, uint8x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [8]"
    },
    {
      "name": "vset_lane_u16",
      "full name": "uint16x4_t vset_lane_u16(uint16_t a, uint16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vset_lane_u32",
      "full name": "uint32x2_t vset_lane_u32(uint32_t a, uint32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [32]"
    },
    {
      "name": "vset_lane_u64",
      "full name": "uint64x1_t vset_lane_u64(uint64_t a, uint64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [64]"
    },
    {
      "name": "vset_lane_p64",
      "full name": "poly64x1_t vset_lane_p64(poly64_t a, poly64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] ins [64]"
    },
    {
      "name": "vset_lane_s8",
      "full name": "int8x8_t vset_lane_s8(int8_t a, int8x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [8]"
    },
    {
      "name": "vset_lane_s16",
      "full name": "int16x4_t vset_lane_s16(int16_t a, int16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vset_lane_s32",
      "full name": "int32x2_t vset_lane_s32(int32_t a, int32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [32]"
    },
    {
      "name": "vset_lane_s64",
      "full name": "int64x1_t vset_lane_s64(int64_t a, int64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [64]"
    },
    {
      "name": "vset_lane_p8",
      "full name": "poly8x8_t vset_lane_p8(poly8_t a, poly8x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [8]"
    },
    {
      "name": "vset_lane_p16",
      "full name": "poly16x4_t vset_lane_p16(poly16_t a, poly16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vset_lane_f16",
      "full name": "float16x4_t vset_lane_f16(float16_t a, float16x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vsetq_lane_f16",
      "full name": "float16x8_t vsetq_lane_f16(float16_t a, float16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vset_lane_f32",
      "full name": "float32x2_t vset_lane_f32(float32_t a, float32x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [32]"
    },
    {
      "name": "vset_lane_f64",
      "full name": "float64x1_t vset_lane_f64(float64_t a, float64x1_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [64]"
    },
    {
      "name": "vsetq_lane_u8",
      "full name": "uint8x16_t vsetq_lane_u8(uint8_t a, uint8x16_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [8]"
    },
    {
      "name": "vsetq_lane_u16",
      "full name": "uint16x8_t vsetq_lane_u16(uint16_t a, uint16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vsetq_lane_u32",
      "full name": "uint32x4_t vsetq_lane_u32(uint32_t a, uint32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [32]"
    },
    {
      "name": "vsetq_lane_u64",
      "full name": "uint64x2_t vsetq_lane_u64(uint64_t a, uint64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [64]"
    },
    {
      "name": "vsetq_lane_p64",
      "full name": "poly64x2_t vsetq_lane_p64(poly64_t a, poly64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [64]"
    },
    {
      "name": "vsetq_lane_s8",
      "full name": "int8x16_t vsetq_lane_s8(int8_t a, int8x16_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [8]"
    },
    {
      "name": "vsetq_lane_s16",
      "full name": "int16x8_t vsetq_lane_s16(int16_t a, int16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vsetq_lane_s32",
      "full name": "int32x4_t vsetq_lane_s32(int32_t a, int32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [32]"
    },
    {
      "name": "vsetq_lane_s64",
      "full name": "int64x2_t vsetq_lane_s64(int64_t a, int64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [64]"
    },
    {
      "name": "vsetq_lane_p8",
      "full name": "poly8x16_t vsetq_lane_p8(poly8_t a, poly8x16_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [8]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [8]"
    },
    {
      "name": "vsetq_lane_p16",
      "full name": "poly16x8_t vsetq_lane_p16(poly16_t a, poly16x8_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [16]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [16]"
    },
    {
      "name": "vsetq_lane_f32",
      "full name": "float32x4_t vsetq_lane_f32(float32_t a, float32x4_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [32]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [32]"
    },
    {
      "name": "vsetq_lane_f64",
      "full name": "float64x2_t vsetq_lane_f64(float64_t a, float64x2_t v, const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ins",
      "function_en": "[vector] ins [64]",
      "function_cn": "[向量] 将一个数拷贝到向量中指定的位置 [64]"
    },
    {
      "name": "vrecpxs_f32",
      "full name": "float32_t vrecpxs_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecpx",
      "function_en": "[vector] frecpx [32]",
      "function_cn": "[向量] 浮点倒数平方根 [32]"
    },
    {
      "name": "vrecpxd_f64",
      "full name": "float64_t vrecpxd_f64(float64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "frecpx",
      "function_en": "[vector] frecpx [64]",
      "function_cn": "[向量] 浮点倒数平方根 [64]"
    },
    {
      "name": "vfma_n_f32",
      "full name": "float32x2_t vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 浮点类型乘加 [32]"
    },
    {
      "name": "vfmaq_n_f32",
      "full name": "float32x4_t vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [32]",
      "function_cn": "[向量] 浮点类型乘加 [32]"
    },
    {
      "name": "vfms_n_f32",
      "full name": "float32x2_t vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 浮点类型乘减 [32]"
    },
    {
      "name": "vfmsq_n_f32",
      "full name": "float32x4_t vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [32]",
      "function_cn": "[向量] 浮点类型乘减 [32]"
    },
    {
      "name": "vfma_n_f64",
      "full name": "float64x1_t vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmadd",
      "function_en": "[vector] fmadd [64]",
      "function_cn": "[向量] 浮点类型乘加 [64]"
    },
    {
      "name": "vfmaq_n_f64",
      "full name": "float64x2_t vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmla",
      "function_en": "[vector] fmla [64]",
      "function_cn": "[向量] 浮点类型乘加 [64]"
    },
    {
      "name": "vfms_n_f64",
      "full name": "float64x1_t vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmsub",
      "function_en": "[vector] fmsub [64]",
      "function_cn": "[向量] 浮点类型乘减 [64]"
    },
    {
      "name": "vfmsq_n_f64",
      "full name": "float64x2_t vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "fmls",
      "function_en": "[vector] fmls [64]",
      "function_cn": "[向量] 浮点类型乘减 [64]"
    },
    {
      "name": "vtrn_s8",
      "full name": "int8x8x2_t vtrn_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [8]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn_s16",
      "full name": "int16x4x2_t vtrn_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [16]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn_u8",
      "full name": "uint8x8x2_t vtrn_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [8]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn_u16",
      "full name": "uint16x4x2_t vtrn_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [16]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn_p8",
      "full name": "poly8x8x2_t vtrn_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [8]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrn_p16",
      "full name": "poly16x4x2_t vtrn_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [16]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrn_s32",
      "full name": "int32x2x2_t vtrn_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [32]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn_f32",
      "full name": "float32x2x2_t vtrn_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1",
      "function_en": "[vector] trn1 [32]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrn_u32",
      "full name": "uint32x2x2_t vtrn_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [32]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrnq_s8",
      "full name": "int8x16x2_t vtrnq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [8]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrnq_s16",
      "full name": "int16x8x2_t vtrnq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [16]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrnq_s32",
      "full name": "int32x4x2_t vtrnq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [32]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrnq_f32",
      "full name": "float32x4x2_t vtrnq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [32]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrnq_u8",
      "full name": "uint8x16x2_t vtrnq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [8]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrnq_u16",
      "full name": "uint16x8x2_t vtrnq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [16]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vtrnq_u32",
      "full name": "uint32x4x2_t vtrnq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [32]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [32]"
    },
    {
      "name": "vtrnq_p8",
      "full name": "poly8x16x2_t vtrnq_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [8]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [8]"
    },
    {
      "name": "vtrnq_p16",
      "full name": "poly16x8x2_t vtrnq_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "trn1 trn2",
      "function_en": "[vector] trn1 trn2 [16]",
      "function_cn": "[向量] 交错获取偶数索引元素，再交错获取奇数索引元素(索引从0开始) [16]"
    },
    {
      "name": "vzip_s8",
      "full name": "int8x8x2_t vzip_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [8]"
    },
    {
      "name": "vzip_s16",
      "full name": "int16x4x2_t vzip_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [16]"
    },
    {
      "name": "vzip_u8",
      "full name": "uint8x8x2_t vzip_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [8]"
    },
    {
      "name": "vzip_u16",
      "full name": "uint16x4x2_t vzip_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [16]"
    },
    {
      "name": "vzip_p8",
      "full name": "poly8x8x2_t vzip_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [8]"
    },
    {
      "name": "vzip_p16",
      "full name": "poly16x4x2_t vzip_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [16]"
    },
    {
      "name": "vzip_s32",
      "full name": "int32x2x2_t vzip_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [32]"
    },
    {
      "name": "vzip_f32",
      "full name": "float32x2x2_t vzip_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [32]"
    },
    {
      "name": "vzip_u32",
      "full name": "uint32x2x2_t vzip_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [32]"
    },
    {
      "name": "vzipq_s8",
      "full name": "int8x16x2_t vzipq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [8]"
    },
    {
      "name": "vzipq_s16",
      "full name": "int16x8x2_t vzipq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [16]"
    },
    {
      "name": "vzipq_s32",
      "full name": "int32x4x2_t vzipq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [32]"
    },
    {
      "name": "vzipq_f32",
      "full name": "float32x4x2_t vzipq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [32]"
    },
    {
      "name": "vzipq_u8",
      "full name": "uint8x16x2_t vzipq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [8]"
    },
    {
      "name": "vzipq_u16",
      "full name": "uint16x8x2_t vzipq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [16]"
    },
    {
      "name": "vzipq_u32",
      "full name": "uint32x4x2_t vzipq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [32]",
      "function_cn": "[向量] 交错读取两个向量中的元素[32]"
    },
    {
      "name": "vzipq_p8",
      "full name": "poly8x16x2_t vzipq_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [8]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [8]"
    },
    {
      "name": "vzipq_p16",
      "full name": "poly16x8x2_t vzipq_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "zip1 zip2",
      "function_en": "[vector] zip1 zip2 [16]",
      "function_cn": "[向量] 交错读取两个向量中的元素 [16]"
    },
    {
      "name": "vuzp_s8",
      "full name": "int8x8x2_t vuzp_s8(int8x8_t a, int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [8]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [8]"
    },
    {
      "name": "vuzp_s16",
      "full name": "int16x4x2_t vuzp_s16(int16x4_t a, int16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [16]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [16]"
    },
    {
      "name": "vuzp_s32",
      "full name": "int32x2x2_t vuzp_s32(int32x2_t a, int32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [32]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [32]"
    },
    {
      "name": "vuzp_f32",
      "full name": "float32x2x2_t vuzp_f32(float32x2_t a, float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [32]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [32]"
    },
    {
      "name": "vuzp_u8",
      "full name": "uint8x8x2_t vuzp_u8(uint8x8_t a, uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [8]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [8]"
    },
    {
      "name": "vuzp_u16",
      "full name": "uint16x4x2_t vuzp_u16(uint16x4_t a, uint16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [16]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [16]"
    },
    {
      "name": "vuzp_u32",
      "full name": "uint32x2x2_t vuzp_u32(uint32x2_t a, uint32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [32]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [32]"
    },
    {
      "name": "vuzp_p8",
      "full name": "poly8x8x2_t vuzp_p8(poly8x8_t a, poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [8]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [8]"
    },
    {
      "name": "vuzp_p16",
      "full name": "poly16x4x2_t vuzp_p16(poly16x4_t a, poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [16]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [16]"
    },
    {
      "name": "vuzpq_s8",
      "full name": "int8x16x2_t vuzpq_s8(int8x16_t a, int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [8]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [8]"
    },
    {
      "name": "vuzpq_s16",
      "full name": "int16x8x2_t vuzpq_s16(int16x8_t a, int16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [16]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [16]"
    },
    {
      "name": "vuzpq_s32",
      "full name": "int32x4x2_t vuzpq_s32(int32x4_t a, int32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [32]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [32]"
    },
    {
      "name": "vuzpq_f32",
      "full name": "float32x4x2_t vuzpq_f32(float32x4_t a, float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [32]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [32]"
    },
    {
      "name": "vuzpq_u8",
      "full name": "uint8x16x2_t vuzpq_u8(uint8x16_t a, uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [8]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [8]"
    },
    {
      "name": "vuzpq_u16",
      "full name": "uint16x8x2_t vuzpq_u16(uint16x8_t a, uint16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [16]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [16]"
    },
    {
      "name": "vuzpq_u32",
      "full name": "uint32x4x2_t vuzpq_u32(uint32x4_t a, uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [32]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [32]"
    },
    {
      "name": "vuzpq_p8",
      "full name": "poly8x16x2_t vuzpq_p8(poly8x16_t a, poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [8]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [8]"
    },
    {
      "name": "vuzpq_p16",
      "full name": "poly16x8x2_t vuzpq_p16(poly16x8_t a, poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "uzp1 uzp2",
      "function_en": "[vector] uzp1 uzp2 [16]",
      "function_cn": "[向量] 读取两个向量中的偶数索引再读取奇数索引(索引从0开始) [16]"
    },
    {
      "name": "vreinterpret_s16_s8",
      "full name": "int16x4_t vreinterpret_s16_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP [8]",
      "function_cn": "[向量] 空操作 [8]"
    },
    {
      "name": "vreinterpret_s32_s8",
      "full name": "int32x2_t vreinterpret_s32_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f32_s8",
      "full name": "float32x2_t vreinterpret_f32_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u8_s8",
      "full name": "uint8x8_t vreinterpret_u8_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u16_s8",
      "full name": "uint16x4_t vreinterpret_u16_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u32_s8",
      "full name": "uint32x2_t vreinterpret_u32_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p8_s8",
      "full name": "poly8x8_t vreinterpret_p8_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p16_s8",
      "full name": "poly16x4_t vreinterpret_p16_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u64_s8",
      "full name": "uint64x1_t vreinterpret_u64_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s64_s8",
      "full name": "int64x1_t vreinterpret_s64_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f64_s8",
      "full name": "float64x1_t vreinterpret_f64_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p64_s8",
      "full name": "poly64x1_t vreinterpret_p64_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f16_s8",
      "full name": "float16x4_t vreinterpret_f16_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s8_s16",
      "full name": "int8x8_t vreinterpret_s8_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s32_s16",
      "full name": "int32x2_t vreinterpret_s32_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f32_s16",
      "full name": "float32x2_t vreinterpret_f32_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u8_s16",
      "full name": "uint8x8_t vreinterpret_u8_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u16_s16",
      "full name": "uint16x4_t vreinterpret_u16_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u32_s16",
      "full name": "uint32x2_t vreinterpret_u32_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p8_s16",
      "full name": "poly8x8_t vreinterpret_p8_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p16_s16",
      "full name": "poly16x4_t vreinterpret_p16_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u64_s16",
      "full name": "uint64x1_t vreinterpret_u64_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s64_s16",
      "full name": "int64x1_t vreinterpret_s64_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f64_s16",
      "full name": "float64x1_t vreinterpret_f64_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p64_s16",
      "full name": "poly64x1_t vreinterpret_p64_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f16_s16",
      "full name": "float16x4_t vreinterpret_f16_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s8_s32",
      "full name": "int8x8_t vreinterpret_s8_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s16_s32",
      "full name": "int16x4_t vreinterpret_s16_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_f32_s32",
      "full name": "float32x2_t vreinterpret_f32_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u8_s32",
      "full name": "uint8x8_t vreinterpret_u8_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u16_s32",
      "full name": "uint16x4_t vreinterpret_u16_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u32_s32",
      "full name": "uint32x2_t vreinterpret_u32_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p8_s32",
      "full name": "poly8x8_t vreinterpret_p8_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p16_s32",
      "full name": "poly16x4_t vreinterpret_p16_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u64_s32",
      "full name": "uint64x1_t vreinterpret_u64_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s64_s32",
      "full name": "int64x1_t vreinterpret_s64_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_f64_s32",
      "full name": "float64x1_t vreinterpret_f64_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p64_s32",
      "full name": "poly64x1_t vreinterpret_p64_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_f16_s32",
      "full name": "float16x4_t vreinterpret_f16_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s8_f32",
      "full name": "int8x8_t vreinterpret_s8_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s16_f32",
      "full name": "int16x4_t vreinterpret_s16_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s32_f32",
      "full name": "int32x2_t vreinterpret_s32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u8_f32",
      "full name": "uint8x8_t vreinterpret_u8_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u16_f32",
      "full name": "uint16x4_t vreinterpret_u16_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u32_f32",
      "full name": "uint32x2_t vreinterpret_u32_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p8_f32",
      "full name": "poly8x8_t vreinterpret_p8_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p16_f32",
      "full name": "poly16x4_t vreinterpret_p16_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u64_f32",
      "full name": "uint64x1_t vreinterpret_u64_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s64_f32",
      "full name": "int64x1_t vreinterpret_s64_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_f64_f32",
      "full name": "float64x1_t vreinterpret_f64_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p64_f32",
      "full name": "poly64x1_t vreinterpret_p64_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p64_f64",
      "full name": "poly64x1_t vreinterpret_p64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f16_f32",
      "full name": "float16x4_t vreinterpret_f16_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s8_u8",
      "full name": "int8x8_t vreinterpret_s8_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s16_u8",
      "full name": "int16x4_t vreinterpret_s16_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s32_u8",
      "full name": "int32x2_t vreinterpret_s32_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f32_u8",
      "full name": "float32x2_t vreinterpret_f32_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u16_u8",
      "full name": "uint16x4_t vreinterpret_u16_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u32_u8",
      "full name": "uint32x2_t vreinterpret_u32_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p8_u8",
      "full name": "poly8x8_t vreinterpret_p8_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p16_u8",
      "full name": "poly16x4_t vreinterpret_p16_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u64_u8",
      "full name": "uint64x1_t vreinterpret_u64_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s64_u8",
      "full name": "int64x1_t vreinterpret_s64_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f64_u8",
      "full name": "float64x1_t vreinterpret_f64_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p64_u8",
      "full name": "poly64x1_t vreinterpret_p64_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f16_u8",
      "full name": "float16x4_t vreinterpret_f16_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s8_u16",
      "full name": "int8x8_t vreinterpret_s8_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s16_u16",
      "full name": "int16x4_t vreinterpret_s16_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s32_u16",
      "full name": "int32x2_t vreinterpret_s32_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f32_u16",
      "full name": "float32x2_t vreinterpret_f32_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u8_u16",
      "full name": "uint8x8_t vreinterpret_u8_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u32_u16",
      "full name": "uint32x2_t vreinterpret_u32_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p8_u16",
      "full name": "poly8x8_t vreinterpret_p8_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p16_u16",
      "full name": "poly16x4_t vreinterpret_p16_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u64_u16",
      "full name": "uint64x1_t vreinterpret_u64_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s64_u16",
      "full name": "int64x1_t vreinterpret_s64_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f64_u16",
      "full name": "float64x1_t vreinterpret_f64_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p64_u16",
      "full name": "poly64x1_t vreinterpret_p64_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f16_u16",
      "full name": "float16x4_t vreinterpret_f16_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s8_u32",
      "full name": "int8x8_t vreinterpret_s8_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s16_u32",
      "full name": "int16x4_t vreinterpret_s16_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s32_u32",
      "full name": "int32x2_t vreinterpret_s32_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_f32_u32",
      "full name": "float32x2_t vreinterpret_f32_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u8_u32",
      "full name": "uint8x8_t vreinterpret_u8_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u16_u32",
      "full name": "uint16x4_t vreinterpret_u16_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p8_u32",
      "full name": "poly8x8_t vreinterpret_p8_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p16_u32",
      "full name": "poly16x4_t vreinterpret_p16_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_u64_u32",
      "full name": "uint64x1_t vreinterpret_u64_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s64_u32",
      "full name": "int64x1_t vreinterpret_s64_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_f64_u32",
      "full name": "float64x1_t vreinterpret_f64_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_p64_u32",
      "full name": "poly64x1_t vreinterpret_p64_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_f16_u32",
      "full name": "float16x4_t vreinterpret_f16_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpret_s8_p8",
      "full name": "int8x8_t vreinterpret_s8_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s16_p8",
      "full name": "int16x4_t vreinterpret_s16_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s32_p8",
      "full name": "int32x2_t vreinterpret_s32_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f32_p8",
      "full name": "float32x2_t vreinterpret_f32_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u8_p8",
      "full name": "uint8x8_t vreinterpret_u8_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u16_p8",
      "full name": "uint16x4_t vreinterpret_u16_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u32_p8",
      "full name": "uint32x2_t vreinterpret_u32_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p16_p8",
      "full name": "poly16x4_t vreinterpret_p16_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_u64_p8",
      "full name": "uint64x1_t vreinterpret_u64_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s64_p8",
      "full name": "int64x1_t vreinterpret_s64_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f64_p8",
      "full name": "float64x1_t vreinterpret_f64_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_p64_p8",
      "full name": "poly64x1_t vreinterpret_p64_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_f16_p8",
      "full name": "float16x4_t vreinterpret_f16_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpret_s8_p16",
      "full name": "int8x8_t vreinterpret_s8_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s16_p16",
      "full name": "int16x4_t vreinterpret_s16_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s32_p16",
      "full name": "int32x2_t vreinterpret_s32_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f32_p16",
      "full name": "float32x2_t vreinterpret_f32_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u8_p16",
      "full name": "uint8x8_t vreinterpret_u8_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u16_p16",
      "full name": "uint16x4_t vreinterpret_u16_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u32_p16",
      "full name": "uint32x2_t vreinterpret_u32_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p8_p16",
      "full name": "poly8x8_t vreinterpret_p8_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u64_p16",
      "full name": "uint64x1_t vreinterpret_u64_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s64_p16",
      "full name": "int64x1_t vreinterpret_s64_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f64_p16",
      "full name": "float64x1_t vreinterpret_f64_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p64_p16",
      "full name": "poly64x1_t vreinterpret_p64_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f16_p16",
      "full name": "float16x4_t vreinterpret_f16_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s8_u64",
      "full name": "int8x8_t vreinterpret_s8_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s16_u64",
      "full name": "int16x4_t vreinterpret_s16_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s32_u64",
      "full name": "int32x2_t vreinterpret_s32_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f32_u64",
      "full name": "float32x2_t vreinterpret_f32_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u8_u64",
      "full name": "uint8x8_t vreinterpret_u8_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u16_u64",
      "full name": "uint16x4_t vreinterpret_u16_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u32_u64",
      "full name": "uint32x2_t vreinterpret_u32_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p8_u64",
      "full name": "poly8x8_t vreinterpret_p8_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p16_u64",
      "full name": "poly16x4_t vreinterpret_p16_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s64_u64",
      "full name": "int64x1_t vreinterpret_s64_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f64_u64",
      "full name": "float64x1_t vreinterpret_f64_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p64_u64",
      "full name": "poly64x1_t vreinterpret_p64_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f16_u64",
      "full name": "float16x4_t vreinterpret_f16_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s8_s64",
      "full name": "int8x8_t vreinterpret_s8_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s16_s64",
      "full name": "int16x4_t vreinterpret_s16_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s32_s64",
      "full name": "int32x2_t vreinterpret_s32_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f32_s64",
      "full name": "float32x2_t vreinterpret_f32_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u8_s64",
      "full name": "uint8x8_t vreinterpret_u8_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u16_s64",
      "full name": "uint16x4_t vreinterpret_u16_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u32_s64",
      "full name": "uint32x2_t vreinterpret_u32_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p8_s64",
      "full name": "poly8x8_t vreinterpret_p8_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p16_s64",
      "full name": "poly16x4_t vreinterpret_p16_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u64_s64",
      "full name": "uint64x1_t vreinterpret_u64_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f64_s64",
      "full name": "float64x1_t vreinterpret_f64_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f16_s64",
      "full name": "float16x4_t vreinterpret_f16_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s8_f16",
      "full name": "int8x8_t vreinterpret_s8_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s16_f16",
      "full name": "int16x4_t vreinterpret_s16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s32_f16",
      "full name": "int32x2_t vreinterpret_s32_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f32_f16",
      "full name": "float32x2_t vreinterpret_f32_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u8_f16",
      "full name": "uint8x8_t vreinterpret_u8_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u16_f16",
      "full name": "uint16x4_t vreinterpret_u16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u32_f16",
      "full name": "uint32x2_t vreinterpret_u32_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p8_f16",
      "full name": "poly8x8_t vreinterpret_p8_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p16_f16",
      "full name": "poly16x4_t vreinterpret_p16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_u64_f16",
      "full name": "uint64x1_t vreinterpret_u64_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s64_f16",
      "full name": "int64x1_t vreinterpret_s64_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_f64_f16",
      "full name": "float64x1_t vreinterpret_f64_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_p64_f16",
      "full name": "poly64x1_t vreinterpret_p64_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s16_s8",
      "full name": "int16x8_t vreinterpretq_s16_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s32_s8",
      "full name": "int32x4_t vreinterpretq_s32_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f32_s8",
      "full name": "float32x4_t vreinterpretq_f32_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u8_s8",
      "full name": "uint8x16_t vreinterpretq_u8_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u16_s8",
      "full name": "uint16x8_t vreinterpretq_u16_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u32_s8",
      "full name": "uint32x4_t vreinterpretq_u32_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p8_s8",
      "full name": "poly8x16_t vreinterpretq_p8_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p16_s8",
      "full name": "poly16x8_t vreinterpretq_p16_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u64_s8",
      "full name": "uint64x2_t vreinterpretq_u64_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s64_s8",
      "full name": "int64x2_t vreinterpretq_s64_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f64_s8",
      "full name": "float64x2_t vreinterpretq_f64_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p64_s8",
      "full name": "poly64x2_t vreinterpretq_p64_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p128_s8",
      "full name": "poly128_t vreinterpretq_p128_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f16_s8",
      "full name": "float16x8_t vreinterpretq_f16_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s8_s16",
      "full name": "int8x16_t vreinterpretq_s8_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s32_s16",
      "full name": "int32x4_t vreinterpretq_s32_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f32_s16",
      "full name": "float32x4_t vreinterpretq_f32_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u8_s16",
      "full name": "uint8x16_t vreinterpretq_u8_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u16_s16",
      "full name": "uint16x8_t vreinterpretq_u16_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u32_s16",
      "full name": "uint32x4_t vreinterpretq_u32_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p8_s16",
      "full name": "poly8x16_t vreinterpretq_p8_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p16_s16",
      "full name": "poly16x8_t vreinterpretq_p16_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u64_s16",
      "full name": "uint64x2_t vreinterpretq_u64_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s64_s16",
      "full name": "int64x2_t vreinterpretq_s64_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f64_s16",
      "full name": "float64x2_t vreinterpretq_f64_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p64_s16",
      "full name": "poly64x2_t vreinterpretq_p64_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p128_s16",
      "full name": "poly128_t vreinterpretq_p128_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f16_s16",
      "full name": "float16x8_t vreinterpretq_f16_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s8_s32",
      "full name": "int8x16_t vreinterpretq_s8_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s16_s32",
      "full name": "int16x8_t vreinterpretq_s16_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_f32_s32",
      "full name": "float32x4_t vreinterpretq_f32_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u8_s32",
      "full name": "uint8x16_t vreinterpretq_u8_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u16_s32",
      "full name": "uint16x8_t vreinterpretq_u16_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u32_s32",
      "full name": "uint32x4_t vreinterpretq_u32_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p8_s32",
      "full name": "poly8x16_t vreinterpretq_p8_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p16_s32",
      "full name": "poly16x8_t vreinterpretq_p16_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u64_s32",
      "full name": "uint64x2_t vreinterpretq_u64_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s64_s32",
      "full name": "int64x2_t vreinterpretq_s64_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_f64_s32",
      "full name": "float64x2_t vreinterpretq_f64_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p64_s32",
      "full name": "poly64x2_t vreinterpretq_p64_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p128_s32",
      "full name": "poly128_t vreinterpretq_p128_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_f16_s32",
      "full name": "float16x8_t vreinterpretq_f16_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s8_f32",
      "full name": "int8x16_t vreinterpretq_s8_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s16_f32",
      "full name": "int16x8_t vreinterpretq_s16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s32_f32",
      "full name": "int32x4_t vreinterpretq_s32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u8_f32",
      "full name": "uint8x16_t vreinterpretq_u8_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u16_f32",
      "full name": "uint16x8_t vreinterpretq_u16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u32_f32",
      "full name": "uint32x4_t vreinterpretq_u32_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p8_f32",
      "full name": "poly8x16_t vreinterpretq_p8_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p16_f32",
      "full name": "poly16x8_t vreinterpretq_p16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u64_f32",
      "full name": "uint64x2_t vreinterpretq_u64_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s64_f32",
      "full name": "int64x2_t vreinterpretq_s64_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_f64_f32",
      "full name": "float64x2_t vreinterpretq_f64_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p64_f32",
      "full name": "poly64x2_t vreinterpretq_p64_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p128_f32",
      "full name": "poly128_t vreinterpretq_p128_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p64_f64",
      "full name": "poly64x2_t vreinterpretq_p64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p128_f64",
      "full name": "poly128_t vreinterpretq_p128_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f16_f32",
      "full name": "float16x8_t vreinterpretq_f16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s8_u8",
      "full name": "int8x16_t vreinterpretq_s8_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s16_u8",
      "full name": "int16x8_t vreinterpretq_s16_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s32_u8",
      "full name": "int32x4_t vreinterpretq_s32_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f32_u8",
      "full name": "float32x4_t vreinterpretq_f32_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u16_u8",
      "full name": "uint16x8_t vreinterpretq_u16_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u32_u8",
      "full name": "uint32x4_t vreinterpretq_u32_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p8_u8",
      "full name": "poly8x16_t vreinterpretq_p8_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p16_u8",
      "full name": "poly16x8_t vreinterpretq_p16_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u64_u8",
      "full name": "uint64x2_t vreinterpretq_u64_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s64_u8",
      "full name": "int64x2_t vreinterpretq_s64_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f64_u8",
      "full name": "float64x2_t vreinterpretq_f64_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p64_u8",
      "full name": "poly64x2_t vreinterpretq_p64_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p128_u8",
      "full name": "poly128_t vreinterpretq_p128_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f16_u8",
      "full name": "float16x8_t vreinterpretq_f16_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s8_u16",
      "full name": "int8x16_t vreinterpretq_s8_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s16_u16",
      "full name": "int16x8_t vreinterpretq_s16_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s32_u16",
      "full name": "int32x4_t vreinterpretq_s32_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f32_u16",
      "full name": "float32x4_t vreinterpretq_f32_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u8_u16",
      "full name": "uint8x16_t vreinterpretq_u8_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u32_u16",
      "full name": "uint32x4_t vreinterpretq_u32_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p8_u16",
      "full name": "poly8x16_t vreinterpretq_p8_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p16_u16",
      "full name": "poly16x8_t vreinterpretq_p16_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u64_u16",
      "full name": "uint64x2_t vreinterpretq_u64_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s64_u16",
      "full name": "int64x2_t vreinterpretq_s64_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f64_u16",
      "full name": "float64x2_t vreinterpretq_f64_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p64_u16",
      "full name": "poly64x2_t vreinterpretq_p64_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p128_u16",
      "full name": "poly128_t vreinterpretq_p128_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f16_u16",
      "full name": "float16x8_t vreinterpretq_f16_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s8_u32",
      "full name": "int8x16_t vreinterpretq_s8_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s16_u32",
      "full name": "int16x8_t vreinterpretq_s16_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s32_u32",
      "full name": "int32x4_t vreinterpretq_s32_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_f32_u32",
      "full name": "float32x4_t vreinterpretq_f32_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u8_u32",
      "full name": "uint8x16_t vreinterpretq_u8_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u16_u32",
      "full name": "uint16x8_t vreinterpretq_u16_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p8_u32",
      "full name": "poly8x16_t vreinterpretq_p8_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p16_u32",
      "full name": "poly16x8_t vreinterpretq_p16_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_u64_u32",
      "full name": "uint64x2_t vreinterpretq_u64_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s64_u32",
      "full name": "int64x2_t vreinterpretq_s64_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_f64_u32",
      "full name": "float64x2_t vreinterpretq_f64_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p64_u32",
      "full name": "poly64x2_t vreinterpretq_p64_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_p128_u32",
      "full name": "poly128_t vreinterpretq_p128_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_f16_u32",
      "full name": "float16x8_t vreinterpretq_f16_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [32]",
      "function_cn": "[向量] 空操作  [32]"
    },
    {
      "name": "vreinterpretq_s8_p8",
      "full name": "int8x16_t vreinterpretq_s8_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s16_p8",
      "full name": "int16x8_t vreinterpretq_s16_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s32_p8",
      "full name": "int32x4_t vreinterpretq_s32_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f32_p8",
      "full name": "float32x4_t vreinterpretq_f32_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u8_p8",
      "full name": "uint8x16_t vreinterpretq_u8_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u16_p8",
      "full name": "uint16x8_t vreinterpretq_u16_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u32_p8",
      "full name": "uint32x4_t vreinterpretq_u32_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p16_p8",
      "full name": "poly16x8_t vreinterpretq_p16_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_u64_p8",
      "full name": "uint64x2_t vreinterpretq_u64_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s64_p8",
      "full name": "int64x2_t vreinterpretq_s64_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f64_p8",
      "full name": "float64x2_t vreinterpretq_f64_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p64_p8",
      "full name": "poly64x2_t vreinterpretq_p64_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_p128_p8",
      "full name": "poly128_t vreinterpretq_p128_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_f16_p8",
      "full name": "float16x8_t vreinterpretq_f16_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [8]",
      "function_cn": "[向量] 空操作  [8]"
    },
    {
      "name": "vreinterpretq_s8_p16",
      "full name": "int8x16_t vreinterpretq_s8_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s16_p16",
      "full name": "int16x8_t vreinterpretq_s16_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s32_p16",
      "full name": "int32x4_t vreinterpretq_s32_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f32_p16",
      "full name": "float32x4_t vreinterpretq_f32_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u8_p16",
      "full name": "uint8x16_t vreinterpretq_u8_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u16_p16",
      "full name": "uint16x8_t vreinterpretq_u16_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u32_p16",
      "full name": "uint32x4_t vreinterpretq_u32_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p8_p16",
      "full name": "poly8x16_t vreinterpretq_p8_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u64_p16",
      "full name": "uint64x2_t vreinterpretq_u64_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s64_p16",
      "full name": "int64x2_t vreinterpretq_s64_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f64_p16",
      "full name": "float64x2_t vreinterpretq_f64_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p64_p16",
      "full name": "poly64x2_t vreinterpretq_p64_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p128_p16",
      "full name": "poly128_t vreinterpretq_p128_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f16_p16",
      "full name": "float16x8_t vreinterpretq_f16_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s8_u64",
      "full name": "int8x16_t vreinterpretq_s8_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s16_u64",
      "full name": "int16x8_t vreinterpretq_s16_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s32_u64",
      "full name": "int32x4_t vreinterpretq_s32_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f32_u64",
      "full name": "float32x4_t vreinterpretq_f32_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u8_u64",
      "full name": "uint8x16_t vreinterpretq_u8_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u16_u64",
      "full name": "uint16x8_t vreinterpretq_u16_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u32_u64",
      "full name": "uint32x4_t vreinterpretq_u32_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p8_u64",
      "full name": "poly8x16_t vreinterpretq_p8_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p16_u64",
      "full name": "poly16x8_t vreinterpretq_p16_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s64_u64",
      "full name": "int64x2_t vreinterpretq_s64_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f64_u64",
      "full name": "float64x2_t vreinterpretq_f64_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f64_s64",
      "full name": "float64x2_t vreinterpretq_f64_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p64_s64",
      "full name": "poly64x2_t vreinterpretq_p64_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p128_s64",
      "full name": "poly128_t vreinterpretq_p128_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p64_u64",
      "full name": "poly64x2_t vreinterpretq_p64_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p128_u64",
      "full name": "poly128_t vreinterpretq_p128_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f16_u64",
      "full name": "float16x8_t vreinterpretq_f16_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s8_s64",
      "full name": "int8x16_t vreinterpretq_s8_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s16_s64",
      "full name": "int16x8_t vreinterpretq_s16_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s32_s64",
      "full name": "int32x4_t vreinterpretq_s32_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f32_s64",
      "full name": "float32x4_t vreinterpretq_f32_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u8_s64",
      "full name": "uint8x16_t vreinterpretq_u8_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u16_s64",
      "full name": "uint16x8_t vreinterpretq_u16_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u32_s64",
      "full name": "uint32x4_t vreinterpretq_u32_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p8_s64",
      "full name": "poly8x16_t vreinterpretq_p8_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p16_s64",
      "full name": "poly16x8_t vreinterpretq_p16_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u64_s64",
      "full name": "uint64x2_t vreinterpretq_u64_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f16_s64",
      "full name": "float16x8_t vreinterpretq_f16_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s8_f16",
      "full name": "int8x16_t vreinterpretq_s8_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s16_f16",
      "full name": "int16x8_t vreinterpretq_s16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s32_f16",
      "full name": "int32x4_t vreinterpretq_s32_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f32_f16",
      "full name": "float32x4_t vreinterpretq_f32_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u8_f16",
      "full name": "uint8x16_t vreinterpretq_u8_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u16_f16",
      "full name": "uint16x8_t vreinterpretq_u16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u32_f16",
      "full name": "uint32x4_t vreinterpretq_u32_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p8_f16",
      "full name": "poly8x16_t vreinterpretq_p8_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p16_f16",
      "full name": "poly16x8_t vreinterpretq_p16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_u64_f16",
      "full name": "uint64x2_t vreinterpretq_u64_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_s64_f16",
      "full name": "int64x2_t vreinterpretq_s64_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_f64_f16",
      "full name": "float64x2_t vreinterpretq_f64_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p64_f16",
      "full name": "poly64x2_t vreinterpretq_p64_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpretq_p128_f16",
      "full name": "poly128_t vreinterpretq_p128_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [16]",
      "function_cn": "[向量] 空操作  [16]"
    },
    {
      "name": "vreinterpret_s8_f64",
      "full name": "int8x8_t vreinterpret_s8_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s16_f64",
      "full name": "int16x4_t vreinterpret_s16_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s32_f64",
      "full name": "int32x2_t vreinterpret_s32_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u8_f64",
      "full name": "uint8x8_t vreinterpret_u8_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u16_f64",
      "full name": "uint16x4_t vreinterpret_u16_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u32_f64",
      "full name": "uint32x2_t vreinterpret_u32_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p8_f64",
      "full name": "poly8x8_t vreinterpret_p8_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p16_f64",
      "full name": "poly16x4_t vreinterpret_p16_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u64_f64",
      "full name": "uint64x1_t vreinterpret_u64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s64_f64",
      "full name": "int64x1_t vreinterpret_s64_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f16_f64",
      "full name": "float16x4_t vreinterpret_f16_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f32_f64",
      "full name": "float32x2_t vreinterpret_f32_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s8_f64",
      "full name": "int8x16_t vreinterpretq_s8_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s16_f64",
      "full name": "int16x8_t vreinterpretq_s16_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s32_f64",
      "full name": "int32x4_t vreinterpretq_s32_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u8_f64",
      "full name": "uint8x16_t vreinterpretq_u8_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u16_f64",
      "full name": "uint16x8_t vreinterpretq_u16_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u32_f64",
      "full name": "uint32x4_t vreinterpretq_u32_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p8_f64",
      "full name": "poly8x16_t vreinterpretq_p8_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p16_f64",
      "full name": "poly16x8_t vreinterpretq_p16_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u64_f64",
      "full name": "uint64x2_t vreinterpretq_u64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s64_f64",
      "full name": "int64x2_t vreinterpretq_s64_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f16_f64",
      "full name": "float16x8_t vreinterpretq_f16_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f32_f64",
      "full name": "float32x4_t vreinterpretq_f32_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s8_p64",
      "full name": "int8x8_t vreinterpret_s8_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s16_p64",
      "full name": "int16x4_t vreinterpret_s16_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s32_p64",
      "full name": "int32x2_t vreinterpret_s32_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u8_p64",
      "full name": "uint8x8_t vreinterpret_u8_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u16_p64",
      "full name": "uint16x4_t vreinterpret_u16_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u32_p64",
      "full name": "uint32x2_t vreinterpret_u32_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p8_p64",
      "full name": "poly8x8_t vreinterpret_p8_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_p16_p64",
      "full name": "poly16x4_t vreinterpret_p16_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_u64_p64",
      "full name": "uint64x1_t vreinterpret_u64_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_s64_p64",
      "full name": "int64x1_t vreinterpret_s64_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f64_p64",
      "full name": "float64x1_t vreinterpret_f64_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpret_f16_p64",
      "full name": "float16x4_t vreinterpret_f16_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s8_p64",
      "full name": "int8x16_t vreinterpretq_s8_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s16_p64",
      "full name": "int16x8_t vreinterpretq_s16_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s32_p64",
      "full name": "int32x4_t vreinterpretq_s32_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u8_p64",
      "full name": "uint8x16_t vreinterpretq_u8_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u16_p64",
      "full name": "uint16x8_t vreinterpretq_u16_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u32_p64",
      "full name": "uint32x4_t vreinterpretq_u32_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p8_p64",
      "full name": "poly8x16_t vreinterpretq_p8_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_p16_p64",
      "full name": "poly16x8_t vreinterpretq_p16_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_u64_p64",
      "full name": "uint64x2_t vreinterpretq_u64_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s64_p64",
      "full name": "int64x2_t vreinterpretq_s64_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f64_p64",
      "full name": "float64x2_t vreinterpretq_f64_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_f16_p64",
      "full name": "float16x8_t vreinterpretq_f16_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [64]",
      "function_cn": "[向量] 空操作  [64]"
    },
    {
      "name": "vreinterpretq_s8_p128",
      "full name": "int8x16_t vreinterpretq_s8_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_s16_p128",
      "full name": "int16x8_t vreinterpretq_s16_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_s32_p128",
      "full name": "int32x4_t vreinterpretq_s32_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_u8_p128",
      "full name": "uint8x16_t vreinterpretq_u8_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_u16_p128",
      "full name": "uint16x8_t vreinterpretq_u16_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_u32_p128",
      "full name": "uint32x4_t vreinterpretq_u32_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_p8_p128",
      "full name": "poly8x16_t vreinterpretq_p8_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_p16_p128",
      "full name": "poly16x8_t vreinterpretq_p16_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_u64_p128",
      "full name": "uint64x2_t vreinterpretq_u64_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_s64_p128",
      "full name": "int64x2_t vreinterpretq_s64_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_f64_p128",
      "full name": "float64x2_t vreinterpretq_f64_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vreinterpretq_f16_p128",
      "full name": "float16x8_t vreinterpretq_f16_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "NOP",
      "function_en": "[vector] NOP  [128]",
      "function_cn": "[向量] 空操作  [128]"
    },
    {
      "name": "vldrq_p128",
      "full name": "poly128_t vldrq_p128(poly128_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "ldr",
      "function_en": "[vector] ldr [128]",
      "function_cn": "[向量] 加载指针指向的内存中的数据 [128]"
    },
    {
      "name": "vstrq_p128",
      "full name": "void vstrq_p128(poly128_t * ptr, poly128_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "str",
      "function_en": "[vector] str [128]",
      "function_cn": "[向量] 将寄存器中的数据存储到指针指向的内存中 [128]"
    },
    {
      "name": "vaeseq_u8",
      "full name": "uint8x16_t vaeseq_u8(uint8x16_t data, uint8x16_t key)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "aese",
      "function_en": "[vector] aese [8]",
      "function_cn": "[向量] 单轮加密 [8]"
    },
    {
      "name": "vaesdq_u8",
      "full name": "uint8x16_t vaesdq_u8(uint8x16_t data, uint8x16_t key)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "aesd",
      "function_en": "[vector] aesd [8]",
      "function_cn": "[向量] 单轮解密 [8]"
    },
    {
      "name": "vaesmcq_u8",
      "full name": "uint8x16_t vaesmcq_u8(uint8x16_t data)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "aesmc",
      "function_en": "[vector] aesmc [8]",
      "function_cn": "[向量] 混合列加密 [8]"
    },
    {
      "name": "vaesimcq_u8",
      "full name": "uint8x16_t vaesimcq_u8(uint8x16_t data)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "aesimc",
      "function_en": "[vector] aesimc [8]",
      "function_cn": "[向量] 反向混合列加密 [8]"
    },
    {
      "name": "vsha1cq_u32",
      "full name": "uint32x4_t vsha1cq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha1c",
      "function_en": "[vector] sha1c [32]",
      "function_cn": "[向量] 哈希更新(选择)加密 [32]"
    },
    {
      "name": "vsha1pq_u32",
      "full name": "uint32x4_t vsha1pq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha1p",
      "function_en": "[vector] sha1p [32]",
      "function_cn": "[向量] 哈希更新(奇偶校验)加密 [32]"
    },
    {
      "name": "vsha1mq_u32",
      "full name": "uint32x4_t vsha1mq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha1m",
      "function_en": "[vector] sha1m [32]",
      "function_cn": "[向量] 哈希更新(多数)加密 [32]"
    },
    {
      "name": "vsha1h_u32",
      "full name": "uint32_t vsha1h_u32(uint32_t hash_e)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha1h",
      "function_en": "[vector] sha1h [32]",
      "function_cn": "[向量] 固定旋转加密 [32]"
    },
    {
      "name": "vsha1su0q_u32",
      "full name": "uint32x4_t vsha1su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha1su0",
      "function_en": "[vector] sha1su0 [32]",
      "function_cn": "[向量] 加密 [32]"
    },
    {
      "name": "vsha1su1q_u32",
      "full name": "uint32x4_t vsha1su1q_u32(uint32x4_t tw0_3, uint32x4_t w12_15)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha1su1",
      "function_en": "[vector] sha1su1 [32]",
      "function_cn": "[向量] 加密 [32]"
    },
    {
      "name": "vsha256hq_u32",
      "full name": "uint32x4_t vsha256hq_u32(uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha256h",
      "function_en": "[vector] sha256h [32]",
      "function_cn": "[向量] 加密 [32]"
    },
    {
      "name": "vsha256h2q_u32",
      "full name": "uint32x4_t vsha256h2q_u32(uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha256h2",
      "function_en": "[vector] sha256h2 [32]",
      "function_cn": "[向量] 加密 [32]"
    },
    {
      "name": "vsha256su0q_u32",
      "full name": "uint32x4_t vsha256su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha256su0",
      "function_en": "[vector] sha256su0 [32]",
      "function_cn": "[向量] 加密 [32]"
    },
    {
      "name": "vsha256su1q_u32",
      "full name": "uint32x4_t vsha256su1q_u32(uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "sha256su1",
      "function_en": "[vector] sha256su1 [32]",
      "function_cn": "[向量] 加密 [32]"
    },
    {
      "name": "vmull_p64",
      "full name": "poly128_t vmull_p64(poly64_t a, poly64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "pmull",
      "function_en": "[vector] pmull [64]",
      "function_cn": "[向量] 多项式乘法 [64]"
    },
    {
      "name": "vmull_high_p64",
      "full name": "poly128_t vmull_high_p64(poly64x2_t a, poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "pmull2",
      "function_en": "[vector] pmull2 [64]",
      "function_cn": "[向量] 多项式乘法 [64]"
    },
    {
      "name": "vcls_u8",
      "full name": "int8x8_t vcls_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.",
      "function_cn": "计数前导符号位（向量）。此指令计算源SIMD&FP寄存器中每个向量元素中与最高有效位相同的最高有效位之后的连续位数，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。计数不包括最高有效位本身。"
    },
    {
      "name": "vclsq_u8",
      "full name": "int8x16_t vclsq_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.",
      "function_cn": "计数前导符号位（向量）。此指令计算源SIMD&FP寄存器中每个向量元素中与最高有效位相同的最高有效位之后的连续位数，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。计数不包括最高有效位本身。"
    },
    {
      "name": "vcls_u16",
      "full name": "int16x4_t vcls_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.",
      "function_cn": "计数前导符号位（向量）。此指令计算源SIMD&FP寄存器中每个向量元素中与最高有效位相同的最高有效位之后的连续位数，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。计数不包括最高有效位本身。"
    },
    {
      "name": "vclsq_u16",
      "full name": "int16x8_t vclsq_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.",
      "function_cn": "计数前导符号位（向量）。此指令计算源SIMD&FP寄存器中每个向量元素中与最高有效位相同的最高有效位之后的连续位数，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。计数不包括最高有效位本身。"
    },
    {
      "name": "vcls_u32",
      "full name": "int32x2_t vcls_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.",
      "function_cn": "计数前导符号位（向量）。此指令计算源SIMD&FP寄存器中每个向量元素中与最高有效位相同的最高有效位之后的连续位数，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。计数不包括最高有效位本身。"
    },
    {
      "name": "vclsq_u32",
      "full name": "int32x4_t vclsq_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.",
      "function_cn": "计数前导符号位（向量）。此指令计算源SIMD&FP寄存器中每个向量元素中与最高有效位相同的最高有效位之后的连续位数，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。计数不包括最高有效位本身。"
    },
    {
      "name": "vadd_p8",
      "full name": "poly8x8_t vadd_p8(poly8x8_t a,poly8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.",
      "function_cn": "按位异或（向量）。此指令在两个源SIMD和FP寄存器之间执行按位异或操作，并将结果放置在目标SIMD和FP寄存器中。"
    },
    {
      "name": "vadd_p16",
      "full name": "poly16x4_t vadd_p16(poly16x4_t a,poly16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.",
      "function_cn": "按位异或（向量）。此指令在两个源SIMD和FP寄存器之间执行按位异或操作，并将结果放置在目标SIMD和FP寄存器中。"
    },
    {
      "name": "vadd_p64",
      "full name": "poly64x1_t vadd_p64(poly64x1_t a,poly64x1_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.",
      "function_cn": "按位异或（向量）。此指令在两个源SIMD和FP寄存器之间执行按位异或操作，并将结果放置在目标SIMD和FP寄存器中。"
    },
    {
      "name": "vaddq_p8",
      "full name": "poly8x16_t vaddq_p8(poly8x16_t a,poly8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.",
      "function_cn": "按位异或（向量）。此指令在两个源SIMD和FP寄存器之间执行按位异或操作，并将结果放置在目标SIMD和FP寄存器中。"
    },
    {
      "name": "vaddq_p16",
      "full name": "poly16x8_t vaddq_p16(poly16x8_t a,poly16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.",
      "function_cn": "按位异或（向量）。此指令在两个源SIMD和FP寄存器之间执行按位异或操作，并将结果放置在目标SIMD和FP寄存器中。"
    },
    {
      "name": "vaddq_p64",
      "full name": "poly64x2_t vaddq_p64(poly64x2_t a,poly64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.",
      "function_cn": "按位异或（向量）。此指令在两个源SIMD和FP寄存器之间执行按位异或操作，并将结果放置在目标SIMD和FP寄存器中。"
    },
    {
      "name": "vaddq_p128",
      "full name": "poly128_t vaddq_p128(poly128_t a,poly128_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.",
      "function_cn": "按位异或（向量）。此指令在两个源SIMD和FP寄存器之间执行按位异或操作，并将结果放置在目标SIMD和FP寄存器中。"
    },
    {
      "name": "__crc32b",
      "full name": "uint32_t __crc32b(uint32_t a,uint8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x04C11DB7 用于 CRC 计算。"
    },
    {
      "name": "__crc32h",
      "full name": "uint32_t __crc32h(uint32_t a,uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x04C11DB7 用于 CRC 计算。"
    },
    {
      "name": "__crc32w",
      "full name": "uint32_t __crc32w(uint32_t a,uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x04C11DB7 用于 CRC 计算。"
    },
    {
      "name": "__crc32d",
      "full name": "uint32_t __crc32d(uint32_t a,uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x04C11DB7 用于 CRC 计算。"
    },
    {
      "name": "__crc32cb",
      "full name": "uint32_t __crc32cb(uint32_t a,uint8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x1EDC6F41 用于 CRC 计算。"
    },
    {
      "name": "__crc32ch",
      "full name": "uint32_t __crc32ch(uint32_t a,uint16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x1EDC6F41 用于 CRC 计算。"
    },
    {
      "name": "__crc32cw",
      "full name": "uint32_t __crc32cw(uint32_t a,uint32_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x1EDC6F41 用于 CRC 计算。"
    },
    {
      "name": "__crc32cd",
      "full name": "uint32_t __crc32cd(uint32_t a,uint64_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.",
      "function_cn": "CRC32校验和对通用寄存器中保存的值执行循环冗余校验（CRC）计算。它在第一个源操作数中获取输入CRC值，对第二个源操作数中的输入值执行CRC，并返回输出CRC值。第二个源操作数可以是8、16、32或64位。为了与常见用法保持一致，值的位顺序作为运算的一部分被颠倒，并且多项式 0x1EDC6F41 用于 CRC 计算。"
    },
    {
      "name": "vqrdmlah_s16",
      "full name": "int16x4_t vqrdmlah_s16(int16x4_t a,int16x4_t b,int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlah_s32",
      "full name": "int32x2_t vqrdmlah_s32(int32x2_t a,int32x2_t b,int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahq_s16",
      "full name": "int16x8_t vqrdmlahq_s16(int16x8_t a,int16x8_t b,int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahq_s32",
      "full name": "int32x4_t vqrdmlahq_s32(int32x4_t a,int32x4_t b,int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlsh_s16",
      "full name": "int16x4_t vqrdmlsh_s16(int16x4_t a,int16x4_t b,int16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlsh_s32",
      "full name": "int32x2_t vqrdmlsh_s32(int32x2_t a,int32x2_t b,int32x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshq_s16",
      "full name": "int16x8_t vqrdmlshq_s16(int16x8_t a,int16x8_t b,int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshq_s32",
      "full name": "int32x4_t vqrdmlshq_s32(int32x4_t a,int32x4_t b,int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlah_lane_s16",
      "full name": "int16x4_t vqrdmlah_lane_s16(int16x4_t a,int16x4_t b,int16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahq_lane_s16",
      "full name": "int16x8_t vqrdmlahq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlah_laneq_s16",
      "full name": "int16x4_t vqrdmlah_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahq_laneq_s16",
      "full name": "int16x8_t vqrdmlahq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlah_lane_s32",
      "full name": "int32x2_t vqrdmlah_lane_s32(int32x2_t a,int32x2_t b,int32x2_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahq_lane_s32",
      "full name": "int32x4_t vqrdmlahq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlah_laneq_s32",
      "full name": "int32x2_t vqrdmlah_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahq_laneq_s32",
      "full name": "int32x4_t vqrdmlahq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlsh_lane_s16",
      "full name": "int16x4_t vqrdmlsh_lane_s16(int16x4_t a,int16x4_t b,int16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshq_lane_s16",
      "full name": "int16x8_t vqrdmlshq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlsh_laneq_s16",
      "full name": "int16x4_t vqrdmlsh_laneq_s16(int16x4_t a,int16x4_t b,int16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshq_laneq_s16",
      "full name": "int16x8_t vqrdmlshq_laneq_s16(int16x8_t a,int16x8_t b,int16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlsh_lane_s32",
      "full name": "int32x2_t vqrdmlsh_lane_s32(int32x2_t a,int32x2_t b,int32x2_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshq_lane_s32",
      "full name": "int32x4_t vqrdmlshq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlsh_laneq_s32",
      "full name": "int32x2_t vqrdmlsh_laneq_s32(int32x2_t a,int32x2_t b,int32x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshq_laneq_s32",
      "full name": "int32x4_t vqrdmlshq_laneq_s32(int32x4_t a,int32x4_t b,int32x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlahh_s16",
      "full name": "int16_t vqrdmlahh_s16(int16_t a,int16_t b,int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlahs_s32",
      "full name": "int32_t vqrdmlahs_s32(int32_t a,int32_t b,int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshh_s16",
      "full name": "int16_t vqrdmlshh_s16(int16_t a,int16_t b,int16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshs_s32",
      "full name": "int32_t vqrdmlshs_s32(int32_t a,int32_t b,int32_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlahh_lane_s16",
      "full name": "int16_t vqrdmlahh_lane_s16(int16_t a,int16_t b,int16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahh_laneq_s16",
      "full name": "int16_t vqrdmlahh_laneq_s16(int16_t a,int16_t b,int16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahs_lane_s32",
      "full name": "int32_t vqrdmlahs_lane_s32(int32_t a,int32_t b,int32x2_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlahs_laneq_s32",
      "full name": "int32_t vqrdmlahs_laneq_s32(int32_t a,int32_t b,int32x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘法累加返回高半（向量）。该指令将第一源SIMD&FP寄存器的向量元素与第二源SIMD&FP寄存器的相应向量元素相乘，而不使乘法结果饱和，将结果加倍，并将最终结果的最高有效一半与目标SIMD&FP 寄存器的向量元素相加。结果将四舍五入。"
    },
    {
      "name": "vqrdmlshh_lane_s16",
      "full name": "int16_t vqrdmlshh_lane_s16(int16_t a,int16_t b,int16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshh_laneq_s16",
      "full name": "int16_t vqrdmlshh_laneq_s16(int16_t a,int16_t b,int16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshs_lane_s32",
      "full name": "int32_t vqrdmlshs_lane_s32(int32_t a,int32_t b,int32x2_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vqrdmlshs_laneq_s32",
      "full name": "int32_t vqrdmlshs_laneq_s32(int32_t a,int32_t b,int32x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.",
      "function_cn": "带符号的饱和舍入加倍乘减返回高半（向量）。该指令将第一个源 SIMD&FP 寄存器的向量元素与第二个源 SIMD&FP 寄存器的相应向量元素相乘，而不会使乘法结果饱和，将结果加倍，并从目标的向量元素中减去最终结果的最高有效一半SIMD&FP 寄存器。结果四舍五入。"
    },
    {
      "name": "vabsh_f16",
      "full name": "float16_t vabsh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点绝对值（向量）。此指令计算源SIMD&FP寄存器中每个向量元素的绝对值，将结果写入向量，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vceqzh_f16",
      "full name": "uint16_t vceqzh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较等于零（向量）。此指令读取源SIMD&FP寄存器中的每个浮点值，如果该值等于零，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcgezh_f16",
      "full name": "uint16_t vcgezh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值大于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vcgtzh_f16",
      "full name": "uint16_t vcgtzh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于零（向量）。此指令读取源SIMD&FP寄存器中的每个浮点值，如果该值大于零，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vclezh_f16",
      "full name": "uint16_t vclezh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较小于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值小于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为零。"
    },
    {
      "name": "vcltzh_f16",
      "full name": "uint16_t vcltzh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较小于零（向量）。此指令读取源SIMD&FP寄存器中的每个浮点值，如果该值小于零，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcvth_f16_s16",
      "full name": "float16_t vcvth_f16_s16(int16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_f16_s32",
      "full name": "float16_t vcvth_f16_s32(int32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_f16_s64",
      "full name": "float16_t vcvth_f16_s64(int64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_f16_u16",
      "full name": "float16_t vcvth_f16_u16(uint16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_f16_u32",
      "full name": "float16_t vcvth_f16_u32(uint32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_f16_u64",
      "full name": "float16_t vcvth_f16_u64(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_s16_f16",
      "full name": "int16_t vcvth_s16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_s32_f16",
      "full name": "int32_t vcvth_s32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_s64_f16",
      "full name": "int64_t vcvth_s64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_u16_f16",
      "full name": "uint16_t vcvth_u16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vcvth_u32_f16",
      "full name": "uint32_t vcvth_u32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vcvth_u64_f16",
      "full name": "uint64_t vcvth_u64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vcvtah_s16_f16",
      "full name": "int16_t vcvtah_s16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtah_s32_f16",
      "full name": "int32_t vcvtah_s32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtah_s64_f16",
      "full name": "int64_t vcvtah_s64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtah_u16_f16",
      "full name": "uint16_t vcvtah_u16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtah_u32_f16",
      "full name": "uint32_t vcvtah_u32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtah_u64_f16",
      "full name": "uint64_t vcvtah_u64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmh_s16_f16",
      "full name": "int16_t vcvtmh_s16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmh_s32_f16",
      "full name": "int32_t vcvtmh_s32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmh_s64_f16",
      "full name": "int64_t vcvtmh_s64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmh_u16_f16",
      "full name": "uint16_t vcvtmh_u16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmh_u32_f16",
      "full name": "uint32_t vcvtmh_u32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmh_u64_f16",
      "full name": "uint64_t vcvtmh_u64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnh_s16_f16",
      "full name": "int16_t vcvtnh_s16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnh_s32_f16",
      "full name": "int32_t vcvtnh_s32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnh_s64_f16",
      "full name": "int64_t vcvtnh_s64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnh_u16_f16",
      "full name": "uint16_t vcvtnh_u16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnh_u32_f16",
      "full name": "uint32_t vcvtnh_u32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnh_u64_f16",
      "full name": "uint64_t vcvtnh_u64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtph_s16_f16",
      "full name": "int16_t vcvtph_s16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtph_s32_f16",
      "full name": "int32_t vcvtph_s32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtph_s64_f16",
      "full name": "int64_t vcvtph_s64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtph_u16_f16",
      "full name": "uint16_t vcvtph_u16_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtph_u32_f16",
      "full name": "uint32_t vcvtph_u32_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtph_u64_f16",
      "full name": "uint64_t vcvtph_u64_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vnegh_f16",
      "full name": "float16_t vnegh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点取反（向量）。该指令将源 SIMD&FP 寄存器中每个向量元素的值取反，将结果写入向量，并将向量写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vrecpeh_f16",
      "full name": "float16_t vrecpeh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数估值。此指令为源SIMD&FP寄存器中的每个向量元素查找近似倒数估计值，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrecpxh_f16",
      "full name": "float16_t vrecpxh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数指数（标量）。此指令为源SIMD&FP寄存器中的每个向量元素查找近似倒数指数，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrndh_f16",
      "full name": "float16_t vrndh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点朝零舍入到整数（向量）。此指令使用向零舍入模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndah_f16",
      "full name": "float16_t vrndah_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，舍入到与 Away（向量）相关的最近值。此指令将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndih_f16",
      "full name": "float16_t vrndih_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "使用当前舍入模式（向量）将浮点舍入为整数。此指令使用FPCR指定的舍入模式，将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndmh_f16",
      "full name": "float16_t vrndmh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，朝向负无穷大（向量）。此指令使用向最小值舍入模式将SIMD&FP源寄存器中的浮点值向向量舍入为大小相同的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndnh_f16",
      "full name": "float16_t vrndnh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，到最接近的偶数（向量）。此指令使用舍入到最近值模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndph_f16",
      "full name": "float16_t vrndph_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，朝正无穷大（向量）方向。此指令使用向正无限舍入模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndxh_f16",
      "full name": "float16_t vrndxh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点精确舍入到整数，使用当前舍入模式（向量）。此指令使用FPCR指定定的舍入模式，将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrsqrteh_f16",
      "full name": "float16_t vrsqrteh_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数平方根估值。此指令为源SIMD&FP寄存器中的每个向量元素计算近似平方根，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vsqrth_f16",
      "full name": "float16_t vsqrth_f16(float16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点平方根（向量）。此指令计算源SIMD&FP寄存器中每个向量元素的平方根，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vaddh_f16",
      "full name": "float16_t vaddh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点添加（向量）。此指令在两个源SIMD&FP寄存器中添加相应的向量元素，将结果写入向量，并将向量写入目标SIMD&FP寄存器。此指令中的所有值都是浮点值。"
    },
    {
      "name": "vabdh_f16",
      "full name": "float16_t vabdh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点绝对差值（向量）。此指令从第一源SIMD&FP寄存器元素中的相应浮点值中减去第二源SIMD&FP寄存器元素中的浮点值，将每个结果的绝对值放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vcageh_f16",
      "full name": "uint16_t vcageh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点绝对比较大于或等于（向量）。该指令将第一个源SIMD&FP寄存器中每个浮点值的绝对值与第二个源SIMD&FP寄存器中相应浮点值的绝对值进行比较，如果第一个值大于或等于第二个值，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcagth_f16",
      "full name": "uint16_t vcagth_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点绝对比较大于（向量）。该指令将第一个源SIMD&FP寄存器中每个浮点值的绝对值与第二个源SIMD&FP寄存器中相应浮点值的绝对值进行比较，如果第一个值大于或等于第二个值，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcaleh_f16",
      "full name": "uint16_t vcaleh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point absolute compare less than or equal",
      "function_cn": "浮点绝对比较小于或等于"
    },
    {
      "name": "vcalth_f16",
      "full name": "uint16_t vcalth_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point absolute compare less than",
      "function_cn": "浮点绝对比较小于"
    },
    {
      "name": "vceqh_f16",
      "full name": "uint16_t vceqh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Equal (vector). This instruction compares each floating-point value from the first source SIMD&FP register, with the corresponding floating-point value from the second source SIMD&FP register, and if the comparison is equal sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较等于（向量）。此指令将第一源SIMD&FP寄存器中的每个浮点值与第二源SIMD&FP寄存器中的相应浮点值进行比较，如果比较相等，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcgeh_f16",
      "full name": "uint16_t vcgeh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than or Equal (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than or equal to the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于（向量）。该指令读取第一个源SIMD&FP寄存器中的每个浮点值，如果该值大于或等于第二个源SIMD&FP寄存器中相应的浮点值，则设置目标SIMD&FP中相应向量元素的每个位寄存器为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcgth_f16",
      "full name": "uint16_t vcgth_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于（向量）。该指令读取第一个源SIMD&FP寄存器中的每个浮点值，如果该值大于第二个源SIMD&FP寄存器中的相应浮点值，则将目标SIMD&FP寄存器中相应向量元素的每个位设置为1，否则，将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcleh_f16",
      "full name": "uint16_t vcleh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point compare less than or equal",
      "function_cn": "浮点比较小于或等于"
    },
    {
      "name": "vclth_f16",
      "full name": "uint16_t vclth_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point compare less than",
      "function_cn": "浮点比较小于"
    },
    {
      "name": "vcvth_n_f16_s16",
      "full name": "float16_t vcvth_n_f16_s16(int16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_f16_s32",
      "full name": "float16_t vcvth_n_f16_s32(int32_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_f16_s64",
      "full name": "float16_t vcvth_n_f16_s64(int64_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_f16_u16",
      "full name": "float16_t vcvth_n_f16_u16(uint16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_f16_u32",
      "full name": "float16_t vcvth_n_f16_u32(uint32_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_f16_u64",
      "full name": "float16_t vcvth_n_f16_u64(uint64_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_s16_f16",
      "full name": "int16_t vcvth_n_s16_f16(float16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_s32_f16",
      "full name": "int32_t vcvth_n_s32_f16(float16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_s64_f16",
      "full name": "int64_t vcvth_n_s64_f16(float16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvth_n_u16_f16",
      "full name": "uint16_t vcvth_n_u16_f16(float16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vcvth_n_u32_f16",
      "full name": "uint32_t vcvth_n_u32_f16(float16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vcvth_n_u64_f16",
      "full name": "uint64_t vcvth_n_u64_f16(float16_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vdivh_f16",
      "full name": "float16_t vdivh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点除法（向量）。此指令将第一源SIMD&FP寄存器中元素中的浮点值除以第二源SIMD&FP寄存器中相应元素中的浮点值，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmaxh_f16",
      "full name": "float16_t vmaxh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最大值（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较大的一个放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmaxnmh_f16",
      "full name": "float16_t vmaxnmh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最大数（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较大的一个写入向量，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vminh_f16",
      "full name": "float16_t vminh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最小值（向量）。此指令比较两个源SIMD&FP寄存器中向量中的相应元素，将两个浮点值中较小的一个放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vminnmh_f16",
      "full name": "float16_t vminnmh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最小数（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较小的一个写入向量，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulh_f16",
      "full name": "float16_t vmulh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulxh_f16",
      "full name": "float16_t vmulxh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrecpsh_f16",
      "full name": "float16_t vrecpsh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数步长。此指令将两个源SIMD和FP寄存器向量中相应的浮点值相乘，从2.0中减去每个乘积，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrsqrtsh_f16",
      "full name": "float16_t vrsqrtsh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数平方根步长。此指令将两个源SIMD和FP寄存器向量中相应的浮点值相乘，从3.0中减去每个乘积，将这些结果除以2.0，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vsubh_f16",
      "full name": "float16_t vsubh_f16(float16_t a,float16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点减法（向量）。此指令从第一源SIMD&FP寄存器中向量中的相应元素中减去第二源SIMD&FP寄存器中向量中的元素，将每个结果放入向量的元素中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vfmah_f16",
      "full name": "float16_t vfmah_f16(float16_t a,float16_t b,float16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, adds the product to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点融合乘加（标量）。该指令将前两个 SIMD&FP 源寄存器的值相乘，将乘积与第三个 SIMD&FP 源寄存器的值相加，并将结果写入 SIMD&FP 目标寄存器。"
    },
    {
      "name": "vfmsh_f16",
      "full name": "float16_t vfmsh_f16(float16_t a,float16_t b,float16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, negates the product, adds that to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点融合乘减（标量）。此指令将前两个SIMD&FP源寄存器的值相乘，取反乘积，将其添加到第三个SIMD&FP源寄存器的值中，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vabs_f16",
      "full name": "float16x4_t vabs_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "成对浮点相加（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值相加，将结果放入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vabsq_f16",
      "full name": "float16x8_t vabsq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "成对浮点相加（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值相加，将结果放入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vceqz_f16",
      "full name": "uint16x4_t vceqz_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较等于零（向量）。此指令读取源SIMD&FP寄存器中的每个浮点值，如果该值等于零，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vceqzq_f16",
      "full name": "uint16x8_t vceqzq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较等于零（向量）。此指令读取源SIMD&FP寄存器中的每个浮点值，如果该值等于零，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcgez_f16",
      "full name": "uint16x4_t vcgez_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值大于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vcgezq_f16",
      "full name": "uint16x8_t vcgezq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值大于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vcgtz_f16",
      "full name": "uint16x4_t vcgtz_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值大于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vcgtzq_f16",
      "full name": "uint16x8_t vcgtzq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值大于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vclez_f16",
      "full name": "uint16x4_t vclez_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较小于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值小于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vclezq_f16",
      "full name": "uint16x8_t vclezq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较小于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值小于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vcltz_f16",
      "full name": "uint16x4_t vcltz_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较小于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值小于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vcltzq_f16",
      "full name": "uint16x8_t vcltzq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较小于或等于零（向量）。该指令读取源 SIMD&FP 寄存器中的每个浮点值，如果该值小于或等于 0，则将目标 SIMD&FP 寄存器中相应向量元素的每一位设置为 1，否则将相应向量元素的每一位设置为目标 SIMD&FP 寄存器为0。"
    },
    {
      "name": "vcvt_f16_s16",
      "full name": "float16x4_t vcvt_f16_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtq_f16_s16",
      "full name": "float16x8_t vcvtq_f16_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvt_f16_u16",
      "full name": "float16x4_t vcvt_f16_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtq_f16_u16",
      "full name": "float16x8_t vcvtq_f16_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvt_s16_f16",
      "full name": "int16x4_t vcvt_s16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtq_s16_f16",
      "full name": "int16x8_t vcvtq_s16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvt_u16_f16",
      "full name": "uint16x4_t vcvt_u16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtq_u16_f16",
      "full name": "uint16x8_t vcvtq_u16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvta_s16_f16",
      "full name": "int16x4_t vcvta_s16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtaq_s16_f16",
      "full name": "int16x8_t vcvtaq_s16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvta_u16_f16",
      "full name": "uint16x4_t vcvta_u16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtaq_u16_f16",
      "full name": "uint16x8_t vcvtaq_u16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到与 Away（向量）相关的最近值。此指令使用四舍五入模式将向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtm_s16_f16",
      "full name": "int16x4_t vcvtm_s16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmq_s16_f16",
      "full name": "int16x8_t vcvtmq_s16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtm_u16_f16",
      "full name": "uint16x4_t vcvtm_u16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtmq_u16_f16",
      "full name": "uint16x8_t vcvtmq_u16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向负无穷大（向量）舍入。此指令使用向负无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtn_s16_f16",
      "full name": "int16x4_t vcvtn_s16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnq_s16_f16",
      "full name": "int16x8_t vcvtnq_s16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtn_u16_f16",
      "full name": "uint16x4_t vcvtn_u16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtnq_u16_f16",
      "full name": "uint16x8_t vcvtnq_u16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，舍入到最接近的偶数（向量）。此指令使用舍入到最近值舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtp_s16_f16",
      "full name": "int16x4_t vcvtp_s16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtpq_s16_f16",
      "full name": "int16x8_t vcvtpq_s16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为有符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtp_u16_f16",
      "full name": "uint16x4_t vcvtp_u16_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtpq_u16_f16",
      "full name": "uint16x8_t vcvtpq_u16_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为无符号整数，向正无穷大（向量）舍入。此指令使用向正无限舍入模式将标量或向量中的每个元素从浮点值转换为无符号整数值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vneg_f16",
      "full name": "float16x4_t vneg_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点取反（向量）。该指令将源 SIMD&FP 寄存器中每个向量元素的值取反，将结果写入向量，并将向量写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vnegq_f16",
      "full name": "float16x8_t vnegq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点取反（向量）。该指令将源 SIMD&FP 寄存器中每个向量元素的值取反，将结果写入向量，并将向量写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vrecpe_f16",
      "full name": "float16x4_t vrecpe_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数估值。此指令为源SIMD&FP寄存器中的每个向量元素查找近似倒数估计值，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrecpeq_f16",
      "full name": "float16x8_t vrecpeq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数估值。此指令为源SIMD&FP寄存器中的每个向量元素查找近似倒数估计值，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrnd_f16",
      "full name": "float16x4_t vrnd_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点朝零舍入到整数（向量）。此指令使用向零舍入模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndq_f16",
      "full name": "float16x8_t vrndq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点朝零舍入到整数（向量）。此指令使用向零舍入模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrnda_f16",
      "full name": "float16x4_t vrnda_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，舍入到与 Away（向量）相关的最近值。此指令将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndaq_f16",
      "full name": "float16x8_t vrndaq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，舍入到与 Away（向量）相关的最近值。此指令将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndi_f16",
      "full name": "float16x4_t vrndi_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "使用当前舍入模式（向量）将浮点舍入为整数。此指令使用FPCR指定的舍入模式，将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndiq_f16",
      "full name": "float16x8_t vrndiq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "使用当前舍入模式（向量）将浮点舍入为整数。此指令使用FPCR指定的舍入模式，将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndm_f16",
      "full name": "float16x4_t vrndm_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，朝向负无穷大（向量）。此指令使用向最小值舍入模式将SIMD&FP源寄存器中的浮点值向向量舍入为大小相同的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndmq_f16",
      "full name": "float16x8_t vrndmq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，朝向负无穷大（向量）。此指令使用向最小值舍入模式将SIMD&FP源寄存器中的浮点值向向量舍入为大小相同的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndn_f16",
      "full name": "float16x4_t vrndn_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，到最接近的偶数（向量）。此指令使用舍入到最近值模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndnq_f16",
      "full name": "float16x8_t vrndnq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，到最接近的偶数（向量）。此指令使用舍入到最近值模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndp_f16",
      "full name": "float16x4_t vrndp_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，朝正无穷大（向量）方向。此指令使用向正无限舍入模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndpq_f16",
      "full name": "float16x8_t vrndpq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点舍入到整数，朝正无穷大（向量）方向。此指令使用向正无限舍入模式将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndx_f16",
      "full name": "float16x4_t vrndx_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点精确舍入到整数，使用当前舍入模式（向量）。此指令使用FPCR指定定的舍入模式，将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrndxq_f16",
      "full name": "float16x8_t vrndxq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点精确舍入到整数，使用当前舍入模式（向量）。此指令使用FPCR指定定的舍入模式，将SIMD&FP源寄存器中的浮点值向量舍入为相同大小的整数浮点值，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vrsqrte_f16",
      "full name": "float16x4_t vrsqrte_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数平方根估值。此指令为源SIMD&FP寄存器中的每个向量元素计算近似平方根，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrsqrteq_f16",
      "full name": "float16x8_t vrsqrteq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数平方根估值。此指令为源SIMD&FP寄存器中的每个向量元素计算近似平方根，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vsqrt_f16",
      "full name": "float16x4_t vsqrt_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点平方根（向量）。此指令计算源SIMD&FP寄存器中每个向量元素的平方根，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vsqrtq_f16",
      "full name": "float16x8_t vsqrtq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点平方根（向量）。此指令计算源SIMD&FP寄存器中每个向量元素的平方根，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vadd_f16",
      "full name": "float16x4_t vadd_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点添加（向量）。此指令在两个源SIMD&FP寄存器中添加相应的向量元素，将结果写入向量，并将向量写入目标SIMD&FP寄存器。此指令中的所有值都是浮点值。"
    },
    {
      "name": "vaddq_f16",
      "full name": "float16x8_t vaddq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点添加（向量）。此指令在两个源SIMD&FP寄存器中添加相应的向量元素，将结果写入向量，并将向量写入目标SIMD&FP寄存器。此指令中的所有值都是浮点值。"
    },
    {
      "name": "vabd_f16",
      "full name": "float16x4_t vabd_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点绝对差值（向量）。此指令从第一源SIMD&FP寄存器元素中的相应浮点值中减去第二源SIMD&FP寄存器元素中的浮点值，将每个结果的绝对值放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vabdq_f16",
      "full name": "float16x8_t vabdq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点绝对差值（向量）。此指令从第一源SIMD&FP寄存器元素中的相应浮点值中减去第二源SIMD&FP寄存器元素中的浮点值，将每个结果的绝对值放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vcage_f16",
      "full name": "uint16x4_t vcage_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点绝对比较大于或等于（向量）。该指令将第一个源SIMD&FP寄存器中每个浮点值的绝对值与第二个源SIMD&FP寄存器中相应浮点值的绝对值进行比较，如果第一个值大于或等于第二个值，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcageq_f16",
      "full name": "uint16x8_t vcageq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点绝对比较大于或等于（向量）。该指令将第一个源SIMD&FP寄存器中每个浮点值的绝对值与第二个源SIMD&FP寄存器中相应浮点值的绝对值进行比较，如果第一个值大于或等于第二个值，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcagt_f16",
      "full name": "uint16x4_t vcagt_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点绝对比较大于（向量）。该指令将第一个源SIMD&FP寄存器中每个浮点值的绝对值与第二个源SIMD&FP寄存器中相应浮点值的绝对值进行比较，如果第一个值大于或等于第二个值，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcagtq_f16",
      "full name": "uint16x8_t vcagtq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点绝对比较大于（向量）。该指令将第一个源SIMD&FP寄存器中每个浮点值的绝对值与第二个源SIMD&FP寄存器中相应浮点值的绝对值进行比较，如果第一个值大于或等于第二个值，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcale_f16",
      "full name": "uint16x4_t vcale_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point absolute compare less than or equal",
      "function_cn": "浮点绝对比较小于或等于"
    },
    {
      "name": "vcaleq_f16",
      "full name": "uint16x8_t vcaleq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point absolute compare less than or equal",
      "function_cn": "浮点绝对比较小于或等于"
    },
    {
      "name": "vcalt_f16",
      "full name": "uint16x4_t vcalt_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point absolute compare less than",
      "function_cn": "浮点绝对比较小于"
    },
    {
      "name": "vcaltq_f16",
      "full name": "uint16x8_t vcaltq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point absolute compare less than",
      "function_cn": "浮点绝对比较小于"
    },
    {
      "name": "vceq_f16",
      "full name": "uint16x4_t vceq_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Equal (vector). This instruction compares each floating-point value from the first source SIMD&FP register, with the corresponding floating-point value from the second source SIMD&FP register, and if the comparison is equal sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较等于（向量）。此指令将第一源SIMD&FP寄存器中的每个浮点值与第二源SIMD&FP寄存器中的相应浮点值进行比较，如果比较相等，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vceqq_f16",
      "full name": "uint16x8_t vceqq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Equal (vector). This instruction compares each floating-point value from the first source SIMD&FP register, with the corresponding floating-point value from the second source SIMD&FP register, and if the comparison is equal sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较等于（向量）。此指令将第一源SIMD&FP寄存器中的每个浮点值与第二源SIMD&FP寄存器中的相应浮点值进行比较，如果比较相等，则将目标SIMD&FP寄存器中相应向量元素的每一位设置为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcge_f16",
      "full name": "uint16x4_t vcge_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than or Equal (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than or equal to the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于（向量）。该指令读取第一个源SIMD&FP寄存器中的每个浮点值，如果该值大于或等于第二个源SIMD&FP寄存器中相应的浮点值，则设置目标SIMD&FP中相应向量元素的每个位寄存器为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcgeq_f16",
      "full name": "uint16x8_t vcgeq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than or Equal (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than or equal to the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于或等于（向量）。该指令读取第一个源SIMD&FP寄存器中的每个浮点值，如果该值大于或等于第二个源SIMD&FP寄存器中相应的浮点值，则设置目标SIMD&FP中相应向量元素的每个位寄存器为1，否则将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcgt_f16",
      "full name": "uint16x4_t vcgt_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于（向量）。该指令读取第一个源SIMD&FP寄存器中的每个浮点值，如果该值大于第二个源SIMD&FP寄存器中的相应浮点值，则将目标SIMD&FP寄存器中相应向量元素的每个位设置为1，否则，将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcgtq_f16",
      "full name": "uint16x8_t vcgtq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Compare Greater than (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.",
      "function_cn": "浮点比较大于（向量）。该指令读取第一个源SIMD&FP寄存器中的每个浮点值，如果该值大于第二个源SIMD&FP寄存器中的相应浮点值，则将目标SIMD&FP寄存器中相应向量元素的每个位设置为1，否则，将目标SIMD&FP寄存器中相应向量元素的每一位设置为0。"
    },
    {
      "name": "vcle_f16",
      "full name": "uint16x4_t vcle_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point compare less than or equal",
      "function_cn": "浮点比较小于或等于"
    },
    {
      "name": "vcleq_f16",
      "full name": "uint16x8_t vcleq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point compare less than or equal",
      "function_cn": "浮点比较小于或等于"
    },
    {
      "name": "vclt_f16",
      "full name": "uint16x4_t vclt_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point compare less than",
      "function_cn": "浮点比较小于"
    },
    {
      "name": "vcltq_f16",
      "full name": "uint16x8_t vcltq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point compare less than",
      "function_cn": "浮点比较小于"
    },
    {
      "name": "vcvt_n_f16_s16",
      "full name": "float16x4_t vcvt_n_f16_s16(int16x4_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtq_n_f16_s16",
      "full name": "float16x8_t vcvtq_n_f16_s16(int16x8_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "有符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvt_n_f16_u16",
      "full name": "float16x4_t vcvt_n_f16_u16(uint16x4_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtq_n_f16_u16",
      "full name": "float16x8_t vcvtq_n_f16_u16(uint16x8_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "无符号定点转换为浮点（向量）。此指令使用FPCR指定的舍入模式将向量中的每个元素从定点转换为浮点，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvt_n_s16_f16",
      "full name": "int16x4_t vcvt_n_s16_f16(float16x4_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvtq_n_s16_f16",
      "full name": "int16x8_t vcvtq_n_s16_f16(float16x8_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点转换为有符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为有符号定点整数，并将结果写入SIMD&FP目标寄存器。"
    },
    {
      "name": "vcvt_n_u16_f16",
      "full name": "uint16x4_t vcvt_n_u16_f16(float16x4_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vcvtq_n_u16_f16",
      "full name": "uint16x8_t vcvtq_n_u16_f16(float16x8_t a,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.",
      "function_cn": "浮点转换为无符号定点，向零舍入（向量）。此指令使用向零舍入模式将标量或向量中的每个元素从浮点转换为无符号定点整数，并将结果写入通用目标寄存器。"
    },
    {
      "name": "vdiv_f16",
      "full name": "float16x4_t vdiv_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点除法（向量）。此指令将第一源SIMD&FP寄存器中元素中的浮点值除以第二源SIMD&FP寄存器中相应元素中的浮点值，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vdivq_f16",
      "full name": "float16x8_t vdivq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点除法（向量）。此指令将第一源SIMD&FP寄存器中元素中的浮点值除以第二源SIMD&FP寄存器中相应元素中的浮点值，将结果放置在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmax_f16",
      "full name": "float16x4_t vmax_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最大值（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较大的一个放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmaxq_f16",
      "full name": "float16x8_t vmaxq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最大值（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较大的一个放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmaxnm_f16",
      "full name": "float16x4_t vmaxnm_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最大数（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较大的一个写入向量，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmaxnmq_f16",
      "full name": "float16x8_t vmaxnmq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最大数（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较大的一个写入向量，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmin_f16",
      "full name": "float16x4_t vmin_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最小值（向量）。此指令比较两个源SIMD&FP寄存器中向量中的相应元素，将两个浮点值中较小的一个放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vminq_f16",
      "full name": "float16x8_t vminq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最小值（向量）。此指令比较两个源SIMD&FP寄存器中向量中的相应元素，将两个浮点值中较小的一个放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vminnm_f16",
      "full name": "float16x4_t vminnm_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最小数（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较小的一个写入向量，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vminnmq_f16",
      "full name": "float16x8_t vminnmq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点最小数（向量）。此指令比较两个源SIMD&FP寄存器中的相应向量元素，将两个浮点值中较小的一个写入向量，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmul_f16",
      "full name": "float16x4_t vmul_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulq_f16",
      "full name": "float16x8_t vmulq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulx_f16",
      "full name": "float16x4_t vmulx_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulxq_f16",
      "full name": "float16x8_t vmulxq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vpadd_f16",
      "full name": "float16x4_t vpadd_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "成对浮点相加（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值相加，将结果放入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpaddq_f16",
      "full name": "float16x8_t vpaddq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "成对浮点相加（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值相加，将结果放入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpmax_f16",
      "full name": "float16x4_t vpmax_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较大的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpmaxq_f16",
      "full name": "float16x8_t vpmaxq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较大的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpmaxnm_f16",
      "full name": "float16x4_t vpmaxnm_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，将每对值中的最大值写入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpmaxnmq_f16",
      "full name": "float16x8_t vpmaxnmq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，将每对值中的最大值写入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpmin_f16",
      "full name": "float16x4_t vpmin_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最小成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较小的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpminq_f16",
      "full name": "float16x8_t vpminq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最小成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较小的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpminnm_f16",
      "full name": "float16x4_t vpminnm_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最小数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，写入每对浮点中的最小元素点值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vpminnmq_f16",
      "full name": "float16x8_t vpminnmq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最小数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，写入每对浮点中的最小元素点值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vrecps_f16",
      "full name": "float16x4_t vrecps_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数步长。此指令将两个源SIMD和FP寄存器向量中相应的浮点值相乘，从2.0中减去每个乘积，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrecpsq_f16",
      "full name": "float16x8_t vrecpsq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数步长。此指令将两个源SIMD和FP寄存器向量中相应的浮点值相乘，从2.0中减去每个乘积，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrsqrts_f16",
      "full name": "float16x4_t vrsqrts_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数平方根步长。此指令将两个源SIMD和FP寄存器向量中相应的浮点值相乘，从3.0中减去每个乘积，将这些结果除以2.0，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vrsqrtsq_f16",
      "full name": "float16x8_t vrsqrtsq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点倒数平方根步长。此指令将两个源SIMD和FP寄存器向量中相应的浮点值相乘，从3.0中减去每个乘积，将这些结果除以2.0，将结果放入向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vsub_f16",
      "full name": "float16x4_t vsub_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点减法（向量）。此指令从第一源SIMD&FP寄存器中向量中的相应元素中减去第二源SIMD&FP寄存器中向量中的元素，将每个结果放入向量的元素中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vsubq_f16",
      "full name": "float16x8_t vsubq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点减法（向量）。此指令从第一源SIMD&FP寄存器中向量中的相应元素中减去第二源SIMD&FP寄存器中向量中的元素，将每个结果放入向量的元素中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vfma_f16",
      "full name": "float16x4_t vfma_f16(float16x4_t a,float16x4_t b,float16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmaq_f16",
      "full name": "float16x8_t vfmaq_f16(float16x8_t a,float16x8_t b,float16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfms_f16",
      "full name": "float16x4_t vfms_f16(float16x4_t a,float16x4_t b,float16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmsq_f16",
      "full name": "float16x8_t vfmsq_f16(float16x8_t a,float16x8_t b,float16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfma_lane_f16",
      "full name": "float16x4_t vfma_lane_f16(float16x4_t a,float16x4_t b,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmaq_lane_f16",
      "full name": "float16x8_t vfmaq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfma_laneq_f16",
      "full name": "float16x4_t vfma_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmaq_laneq_f16",
      "full name": "float16x8_t vfmaq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfma_n_f16",
      "full name": "float16x4_t vfma_n_f16(float16x4_t a,float16x4_t b,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmaq_n_f16",
      "full name": "float16x8_t vfmaq_n_f16(float16x8_t a,float16x8_t b,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmah_lane_f16",
      "full name": "float16_t vfmah_lane_f16(float16_t a,float16_t b,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmah_laneq_f16",
      "full name": "float16_t vfmah_laneq_f16(float16_t a,float16_t b,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "浮点融合乘加到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfms_lane_f16",
      "full name": "float16x4_t vfms_lane_f16(float16x4_t a,float16x4_t b,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmsq_lane_f16",
      "full name": "float16x8_t vfmsq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfms_laneq_f16",
      "full name": "float16x4_t vfms_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmsq_laneq_f16",
      "full name": "float16x8_t vfmsq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfms_n_f16",
      "full name": "float16x4_t vfms_n_f16(float16x4_t a,float16x4_t b,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmsq_n_f16",
      "full name": "float16x8_t vfmsq_n_f16(float16x8_t a,float16x8_t b,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmsh_lane_f16",
      "full name": "float16_t vfmsh_lane_f16(float16_t a,float16_t b,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmsh_laneq_f16",
      "full name": "float16_t vfmsh_laneq_f16(float16_t a,float16_t b,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 从累加器（向量）中进行浮点融合乘减。该指令将两个源 SIMD&FP 寄存器中向量中对应的浮点值相乘，将乘积取反，将结果与目的 SIMD&FP 寄存器的相应向量元素相加，并将结果写入目的 SIMD&FP 寄存器。"
    },
    {
      "name": "vmul_lane_f16",
      "full name": "float16x4_t vmul_lane_f16(float16x4_t a,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulq_lane_f16",
      "full name": "float16x8_t vmulq_lane_f16(float16x8_t a,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmul_laneq_f16",
      "full name": "float16x4_t vmul_laneq_f16(float16x4_t a,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulq_laneq_f16",
      "full name": "float16x8_t vmulq_laneq_f16(float16x8_t a,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmul_n_f16",
      "full name": "float16x4_t vmul_n_f16(float16x4_t a,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulq_n_f16",
      "full name": "float16x8_t vmulq_n_f16(float16x8_t a,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulh_lane_f16",
      "full name": "float16_t vmulh_lane_f16(float16_t a,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulh_laneq_f16",
      "full name": "float16_t vmulh_laneq_f16(float16_t a,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法（向量）。此指令将两个源SIMD&FP寄存器中向量中相应的浮点值相乘，将结果放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulx_lane_f16",
      "full name": "float16x4_t vmulx_lane_f16(float16x4_t a,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulxq_lane_f16",
      "full name": "float16x8_t vmulxq_lane_f16(float16x8_t a,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulx_laneq_f16",
      "full name": "float16x4_t vmulx_laneq_f16(float16x4_t a,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulxq_laneq_f16",
      "full name": "float16x8_t vmulxq_laneq_f16(float16x8_t a,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulx_n_f16",
      "full name": "float16x4_t vmulx_n_f16(float16x4_t a,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulxq_n_f16",
      "full name": "float16x8_t vmulxq_n_f16(float16x8_t a,float16_t n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulxh_lane_f16",
      "full name": "float16_t vmulxh_lane_f16(float16_t a,float16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmulxh_laneq_f16",
      "full name": "float16_t vmulxh_laneq_f16(float16_t a,float16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "浮点乘法扩展。此指令将两个源SIMD&FP寄存器向量中相应的浮点值相乘，将所得浮点值放在向量中，并将向量写入目标SIMD&FP寄存器。"
    },
    {
      "name": "vmaxv_f16",
      "full name": "float16_t vmaxv_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较大的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vmaxvq_f16",
      "full name": "float16_t vmaxvq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较大的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vminv_f16",
      "full name": "float16_t vminv_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较大的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vminvq_f16",
      "full name": "float16_t vminvq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，从连接的向量中读取每对相邻的向量元素，将每对值中较大的值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vmaxnmv_f16",
      "full name": "float16_t vmaxnmv_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，将每对值中的最大值写入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vmaxnmvq_f16",
      "full name": "float16_t vmaxnmvq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最大数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，将每对值中的最大值写入一个向量，并将该向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vminnmv_f16",
      "full name": "float16_t vminnmv_f16(float16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最小数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，写入每对浮点中的最小元素点值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vminnmvq_f16",
      "full name": "float16_t vminnmvq_f16(float16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.",
      "function_cn": "浮点最小数成对（向量）。该指令通过在第二个源 SIMD&FP 寄存器的向量元素之后连接第一个源 SIMD&FP 寄存器的向量元素来创建一个向量，读取两个源 SIMD&FP 寄存器中的每对相邻向量元素，写入每对浮点中的最小元素点值写入向量，并将向量写入目标 SIMD&FP 寄存器。该指令中的所有值都是浮点值。"
    },
    {
      "name": "vbsl_f16",
      "full name": "float16x4_t vbsl_f16(uint16x4_t a,float16x4_t b,float16x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.",
      "function_cn": "按位选择。当原始目标位为1时，此指令将目标SIMD&FP寄存器中的每个位设置为第一个源SIMD&FP寄存器的相应位，否则设置为第二个源SIMD&FP寄存器的相应位。"
    },
    {
      "name": "vbslq_f16",
      "full name": "float16x8_t vbslq_f16(uint16x8_t a,float16x8_t b,float16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.",
      "function_cn": "按位选择。当原始目标位为1时，此指令将目标SIMD&FP寄存器中的每个位设置为第一个源SIMD&FP寄存器的相应位，否则设置为第二个源SIMD&FP寄存器的相应位。"
    },
    {
      "name": "vzip_f16",
      "full name": "float16x4x2_t vzip_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Zip vectors (secondary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.",
      "function_cn": "压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器的上半部分读取相邻的向量元素成对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vzipq_f16",
      "full name": "float16x8x2_t vzipq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Zip vectors (secondary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.",
      "function_cn": "压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器的上半部分读取相邻的向量元素成对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vuzp_f16",
      "full name": "float16x4x2_t vuzp_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "解压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器中读取对应的奇数向量元素，将第一个源寄存器的结果放入向量下半部分的连续元素中，将第二个源寄存器的结果放入上半部分的连续元素中向量，并将向量写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vuzpq_f16",
      "full name": "float16x8x2_t vuzpq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "解压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器中读取对应的奇数向量元素，将第一个源寄存器的结果放入向量下半部分的连续元素中，将第二个源寄存器的结果放入上半部分的连续元素中向量，并将向量写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vtrn_f16",
      "full name": "float16x4x2_t vtrn_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Transpose elements",
      "function_cn": "转置元素"
    },
    {
      "name": "vtrnq_f16",
      "full name": "float16x8x2_t vtrnq_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Transpose elements",
      "function_cn": "转置元素"
    },
    {
      "name": "vmov_n_f16",
      "full name": "float16x4_t vmov_n_f16(float16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vmovq_n_f16",
      "full name": "float16x8_t vmovq_n_f16(float16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdup_n_f16",
      "full name": "float16x4_t vdup_n_f16(float16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdupq_n_f16",
      "full name": "float16x8_t vdupq_n_f16(float16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdup_lane_f16",
      "full name": "float16x4_t vdup_lane_f16(float16x4_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Set all vector lanes to the same value",
      "function_cn": " 将所有向量并行道设置为相同的值"
    },
    {
      "name": "vdupq_lane_f16",
      "full name": "float16x8_t vdupq_lane_f16(float16x4_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Set all vector lanes to the same value",
      "function_cn": " 将所有向量并行道设置为相同的值"
    },
    {
      "name": "vext_f16",
      "full name": "float16x4_t vext_f16(float16x4_t a,float16x4_t b,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.",
      "function_cn": "从向量对中提取向量。该指令从第二个源 SIMD&FP 寄存器中提取最低向量元素，从第一个源 SIMD&FP 寄存器中提取最高向量元素，将结果连接成一个向量，并将该向量写入目标 SIMD&FP 寄存器向量。索引值指定要从第一个源寄存器中提取的最低向量元素，然后从第一个源寄存器中提取连续元素，然后是第二个源寄存器，直到填满目标向量。 "
    },
    {
      "name": "vextq_f16",
      "full name": "float16x8_t vextq_f16(float16x8_t a,float16x8_t b,const int n)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.",
      "function_cn": "从向量对中提取向量。该指令从第二个源 SIMD&FP 寄存器中提取最低向量元素，从第一个源 SIMD&FP 寄存器中提取最高向量元素，将结果连接成一个向量，并将该向量写入目标 SIMD&FP 寄存器向量。索引值指定要从第一个源寄存器中提取的最低向量元素，然后从第一个源寄存器中提取连续元素，然后是第二个源寄存器，直到填满目标向量。 "
    },
    {
      "name": "vrev64_f16",
      "full name": "float16x4_t vrev64_f16(float16x4_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": " 反转 64 位双字（向量）中的元素。该指令反转源 SIMD&FP 寄存器中向量的每个双字中 8 位、16 位或 32 位元素的顺序，将结果放入一个向量中，并将该向量写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vrev64q_f16",
      "full name": "float16x8_t vrev64q_f16(float16x8_t vec)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": " 反转 64 位双字（向量）中的元素。该指令反转源 SIMD&FP 寄存器中向量的每个双字中 8 位、16 位或 32 位元素的顺序，将结果放入一个向量中，并将该向量写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vzip1_f16",
      "full name": "float16x4_t vzip1_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Zip vectors (primary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.",
      "function_cn": "压缩向量（主要）。该指令从两个源 SIMD&FP 寄存器的下半部分读取相邻的向量元素作为对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vzip1q_f16",
      "full name": "float16x8_t vzip1q_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Zip vectors (primary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.",
      "function_cn": "压缩向量（主要）。该指令从两个源 SIMD&FP 寄存器的下半部分读取相邻的向量元素作为对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vzip2_f16",
      "full name": "float16x4_t vzip2_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Zip vectors (secondary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.",
      "function_cn": "压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器的上半部分读取相邻的向量元素成对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vzip2q_f16",
      "full name": "float16x8_t vzip2q_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Zip vectors (secondary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.",
      "function_cn": "压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器的上半部分读取相邻的向量元素成对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vuzp1_f16",
      "full name": "float16x4_t vuzp1_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "解压缩向量（主要）。该指令从两个源 SIMD&FP 寄存器中读取对应的偶数向量元素，从零开始，将第一个源寄存器的结果放入向量下半部分的连续元素中，将第二个源寄存器的结果放入连续元素中在向量的上半部分，并将向量写入目标 SIMD&FP 寄存器。\n\n"
    },
    {
      "name": "vuzp1q_f16",
      "full name": "float16x8_t vuzp1q_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "解压缩向量（主要）。该指令从两个源 SIMD&FP 寄存器中读取对应的偶数向量元素，从零开始，将第一个源寄存器的结果放入向量下半部分的连续元素中，将第二个源寄存器的结果放入连续元素中在向量的上半部分，并将向量写入目标 SIMD&FP 寄存器。\n\n"
    },
    {
      "name": "vuzp2_f16",
      "full name": "float16x4_t vuzp2_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器的上半部分读取相邻的向量元素成对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vuzp2q_f16",
      "full name": "float16x8_t vuzp2q_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "压缩向量（次要）。该指令从两个源 SIMD&FP 寄存器的上半部分读取相邻的向量元素成对，将这些对交错并将它们放入一个向量中，然后将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的第一对被放置到两个最低的向量元素中，随后的对从每个源寄存器中被交替取出。"
    },
    {
      "name": "vtrn1_f16",
      "full name": "float16x4_t vtrn1_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.",
      "function_cn": "转置向量（主要）。该指令从两个源 SIMD&FP 寄存器中读取对应的偶数向量元素，从零开始，将每个结果放入向量的连续元素中，并将向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的向量元素被放置到目标向量的偶数元素中，从零开始，而来自第二个源寄存器的向量元素被放置到目标向量的奇数元素中。 "
    },
    {
      "name": "vtrn1q_f16",
      "full name": "float16x8_t vtrn1q_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.",
      "function_cn": "转置向量（主要）。该指令从两个源 SIMD&FP 寄存器中读取对应的偶数向量元素，从零开始，将每个结果放入向量的连续元素中，并将向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的向量元素被放置到目标向量的偶数元素中，从零开始，而来自第二个源寄存器的向量元素被放置到目标向量的奇数元素中。 "
    },
    {
      "name": "vtrn2_f16",
      "full name": "float16x4_t vtrn2_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.",
      "function_cn": " 转置向量（次要）。该指令从两个源 SIMD&FP 寄存器中读取对应的奇数向量元素，将每个结果放入向量的连续元素中，并将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的向量元素被放置到目标向量的偶数元素中，从零开始，而来自第二个源寄存器的向量元素被放置到目标向量的奇数元素中。"
    },
    {
      "name": "vtrn2q_f16",
      "full name": "float16x8_t vtrn2q_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.",
      "function_cn": " 转置向量（次要）。该指令从两个源 SIMD&FP 寄存器中读取对应的奇数向量元素，将每个结果放入向量的连续元素中，并将该向量写入目标 SIMD&FP 寄存器。来自第一个源寄存器的向量元素被放置到目标向量的偶数元素中，从零开始，而来自第二个源寄存器的向量元素被放置到目标向量的奇数元素中。"
    },
    {
      "name": "vdup_laneq_f16",
      "full name": "float16x4_t vdup_laneq_f16(float16x8_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Set all vector lanes to the same value",
      "function_cn": " 将所有向量并行道设置为相同的值"
    },
    {
      "name": "vdupq_laneq_f16",
      "full name": "float16x8_t vdupq_laneq_f16(float16x8_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Set all vector lanes to the same value",
      "function_cn": " 将所有向量并行道设置为相同的值"
    },
    {
      "name": "vduph_lane_f16",
      "full name": "float16_t vduph_lane_f16(float16x4_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Set all vector lanes to the same value",
      "function_cn": " 将所有向量并行道设置为相同的值"
    },
    {
      "name": "vduph_laneq_f16",
      "full name": "float16_t vduph_laneq_f16(float16x8_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Set all vector lanes to the same value",
      "function_cn": " 将所有向量并行道设置为相同的值"
    },
    {
      "name": "vdot_u32",
      "full name": "uint32x2_t vdot_u32(uint32x2_t r,uint8x8_t a,uint8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积无符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位元素与第二个源寄存器中相应 32 位元素的四个无符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。 "
    },
    {
      "name": "vdot_s32",
      "full name": "int32x2_t vdot_s32(int32x2_t r,int8x8_t a,int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积有符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个有符号 8 位元素与第二个源寄存器中相应 32 位元素的四个有符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。"
    },
    {
      "name": "vdotq_u32",
      "full name": "uint32x4_t vdotq_u32(uint32x4_t r,uint8x16_t a,uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积无符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位元素与第二个源寄存器中相应 32 位元素的四个无符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。 "
    },
    {
      "name": "vdotq_s32",
      "full name": "int32x4_t vdotq_s32(int32x4_t r,int8x16_t a,int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积有符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个有符号 8 位元素与第二个源寄存器中相应 32 位元素的四个有符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。"
    },
    {
      "name": "vdot_lane_u32",
      "full name": "uint32x2_t vdot_lane_u32(uint32x2_t r,uint8x8_t a,uint8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积无符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位元素与第二个源寄存器中相应 32 位元素的四个无符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。 "
    },
    {
      "name": "vdot_lane_s32",
      "full name": "int32x2_t vdot_lane_s32(int32x2_t r,int8x8_t a,int8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积有符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个有符号 8 位元素与第二个源寄存器中相应 32 位元素的四个有符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。"
    },
    {
      "name": "vdotq_laneq_u32",
      "full name": "uint32x4_t vdotq_laneq_u32(uint32x4_t r,uint8x16_t a,uint8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积无符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位元素与第二个源寄存器中相应 32 位元素的四个无符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。 "
    },
    {
      "name": "vdotq_laneq_s32",
      "full name": "int32x4_t vdotq_laneq_s32(int32x4_t r,int8x16_t a,int8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积有符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个有符号 8 位元素与第二个源寄存器中相应 32 位元素的四个有符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。"
    },
    {
      "name": "vdot_laneq_u32",
      "full name": "uint32x2_t vdot_laneq_u32(uint32x2_t r,uint8x8_t a,uint8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积无符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位元素与第二个源寄存器中相应 32 位元素的四个无符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。 "
    },
    {
      "name": "vdot_laneq_s32",
      "full name": "int32x2_t vdot_laneq_s32(int32x2_t r,int8x8_t a,int8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积有符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个有符号 8 位元素与第二个源寄存器中相应 32 位元素的四个有符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。"
    },
    {
      "name": "vdotq_lane_u32",
      "full name": "uint32x4_t vdotq_lane_u32(uint32x4_t r,uint8x16_t a,uint8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积无符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位元素与第二个源寄存器中相应 32 位元素的四个无符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。 "
    },
    {
      "name": "vdotq_lane_s32",
      "full name": "int32x4_t vdotq_lane_s32(int32x4_t r,int8x16_t a,int8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "点积有符号算术（向量）。此指令执行第一个源寄存器的每个 32 位元素中的四个有符号 8 位元素与第二个源寄存器中相应 32 位元素的四个有符号 8 位元素的点积，将结果累加到目标寄存器的相应 32 位元素。"
    },
    {
      "name": "vsha512hq_u64",
      "full name": "uint64x2_t vsha512hq_u64(uint64x2_t hash_ed,uint64x2_t hash_gf,uint64x2_t kwh_kwh2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SHA512 Hash update part 1 takes the values from the three 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the sigma1 and chi functions of two iterations of the SHA512 computation. It returns this value to the destination SIMD&FP register.",
      "function_cn": "SHA512 哈希更新第 1 部分从三个 128 位源 SIMD&FP 寄存器中获取值，并生成一个 128 位输出值，该值结合了 SHA512 计算的两次迭代的 sigma1 和 chi 函数。它将此值返回到目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vsha512h2q_u64",
      "full name": "uint64x2_t vsha512h2q_u64(uint64x2_t sum_ab,uint64x2_t hash_c_,uint64x2_t hash_ab)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SHA512 Hash update part 2 takes the values from the three 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the sigma0 and majority functions of two iterations of the SHA512 computation. It returns this value to the destination SIMD&FP register.",
      "function_cn": "SHA512 哈希更新第 2 部分从三个 128 位源 SIMD&FP 寄存器中获取值，并生成一个 128 位输出值，该值结合了 SHA512 计算的两次迭代的 sigma0 和多数函数。它将此值返回到目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vsha512su0q_u64",
      "full name": "uint64x2_t vsha512su0q_u64(uint64x2_t w0_1,uint64x2_t w2_)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SHA512 Schedule Update 0 takes the values from the two 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the gamma0 functions of two iterations of the SHA512 schedule update that are performed after the first 16 iterations within a block. It returns this value to the destination SIMD&FP register.",
      "function_cn": " SHA512 调度更新 0 从两个 128 位源 SIMD&FP 寄存器中获取值，并产生一个 128 位输出值，该值结合了 SHA512 调度更新的两次迭代的 gamma0 函数，这些函数在块内的前 16 次迭代之后执行。它将此值返回到目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vsha512su1q_u64",
      "full name": "uint64x2_t vsha512su1q_u64(uint64x2_t s01_s02,uint64x2_t w14_15,uint64x2_t w9_10)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SHA512 Schedule Update 1 takes the values from the three source SIMD&FP registers and produces a 128-bit output value that combines the gamma1 functions of two iterations of the SHA512 schedule update that are performed after the first 16 iterations within a block. It returns this value to the destination SIMD&FP register.",
      "function_cn": " SHA512 调度更新 1 从三个源 SIMD&FP 寄存器中获取值并生成一个 128 位输出值，该值结合了 SHA512 调度更新的两次迭代的 gamma1 函数，这些函数在块内的前 16 次迭代之后执行。它将此值返回到目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_u8",
      "full name": "uint8x16_t veor3q_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_u16",
      "full name": "uint16x8_t veor3q_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_u32",
      "full name": "uint32x4_t veor3q_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_u64",
      "full name": "uint64x2_t veor3q_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_s8",
      "full name": "int8x16_t veor3q_s8(int8x16_t a,int8x16_t b,int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_s16",
      "full name": "int16x8_t veor3q_s16(int16x8_t a,int16x8_t b,int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_s32",
      "full name": "int32x4_t veor3q_s32(int32x4_t a,int32x4_t b,int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "veor3q_s64",
      "full name": "int64x2_t veor3q_s64(int64x2_t a,int64x2_t b,int64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Three-way Exclusive OR performs a three-way exclusive OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.",
      "function_cn": "三路异或对三个源 SIMD&FP 寄存器中的值执行三路异或，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vrax1q_u64",
      "full name": "uint64x2_t vrax1q_u64(uint64x2_t a,uint64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Rotate and Exclusive OR rotates each 64-bit element of the 128-bit vector in a source SIMD&FP register left by 1, performs a bitwise exclusive OR of the resulting 128-bit vector and the vector in another source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "循环和异或将源 SIMD&FP 寄存器中的 128 位向量的每个 64 位元素左循环 1，对得到的 128 位向量和另一个源 SIMD&FP 寄存器中的向量执行按位异或，然后写入结果到目标 SIMD&FP 寄存器。 "
    },
    {
      "name": "vxarq_u64",
      "full name": "uint64x2_t vxarq_u64(uint64x2_t a,uint64x2_t b,const int imm6)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Exclusive OR and Rotate performs a bitwise exclusive OR of the 128-bit vectors in the two source SIMD&FP registers, rotates each 64-bit element of the resulting 128-bit vector right by the value specified by a 6-bit immediate value, and writes the result to the destination SIMD&FP register.",
      "function_cn": "异或和旋转对两个源 SIMD&FP 寄存器中的 128 位向量执行按位异或，将结果 128 位向量的每个 64 位元素向右旋转 6 位立即数指定的值，然后写入结果到目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_u8",
      "full name": "uint8x16_t vbcaxq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_u16",
      "full name": "uint16x8_t vbcaxq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_u32",
      "full name": "uint32x4_t vbcaxq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_u64",
      "full name": "uint64x2_t vbcaxq_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_s8",
      "full name": "int8x16_t vbcaxq_s8(int8x16_t a,int8x16_t b,int8x16_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_s16",
      "full name": "int16x8_t vbcaxq_s16(int16x8_t a,int16x8_t b,int16x8_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_s32",
      "full name": "int32x4_t vbcaxq_s32(int32x4_t a,int32x4_t b,int32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vbcaxq_s64",
      "full name": "int64x2_t vbcaxq_s64(int64x2_t a,int64x2_t b,int64x2_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Bit Clear and Exclusive OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.",
      "function_cn": "位清除和异或对源 SIMD&FP 寄存器中的 128 位向量与另一个源 SIMD&FP 寄存器中的余向量执行按位运算，然后对结果向量和第三个源 SIMD&FP 寄存器中的向量执行按位异或运算，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vsm3ss1q_u32",
      "full name": "uint32x4_t vsm3ss1q_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM3SS1 rotates the top 32 bits of the 128-bit vector in the first source SIMD&FP register by 12, and adds that 32-bit value to the two other 32-bit values held in the top 32 bits of each of the 128-bit vectors in the second and third source SIMD&FP registers, rotating this result left by 7 and writing the final result into the top 32 bits of the vector in the destination SIMD&FP register, with the bottom 96 bits of the vector being written to 0.",
      "function_cn": "SM3SS1 将第一个源 SIMD&FP 寄存器中 128 位向量的前 32 位循环 12，并将该 32 位值与保存在第二个和第三个源 SIMD&FP 寄存器中的每个 128 位向量的前 32 位中的另外两个 32 位值相加，将此结果向左循环 7，并将最终结果写入目标 SIMD&FP 寄存器中向量的前 32 位，向量的后 96 位写入 0。 "
    },
    {
      "name": "vsm3tt1aq_u32",
      "full name": "uint32x4_t vsm3tt1aq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c,const int imm2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM3TT1A takes three 128-bit vectors from three source SIMD&FP registers and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a three-way exclusive OR of the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:",
      "function_cn": "SM3TT1A 从三个源 SIMD&FP 寄存器中获取三个 128 位向量和一个 2 位立即数索引值，并在目标 SIMD&FP 寄存器中返回一个 128 位结果。它对保存在第一个源向量的上三个元素中的三个 32 位字段执行三向异或，并将结果 32 位值与以下三个其他 32 位值相加：\n\n "
    },
    {
      "name": "vsm3tt1bq_u32",
      "full name": "uint32x4_t vsm3tt1bq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c,const int imm2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM3TT1B takes three 128-bit vectors from three source SIMD&FP registers and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a 32-bit majority function between the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:",
      "function_cn": "SM3TT1B 从三个源 SIMD&FP 寄存器中获取三个 128 位向量和一个 2 位立即数索引值，并在目标 SIMD&FP 寄存器中返回一个 128 位结果。它在第一个源向量的上三个元素中保存的三个 32 位字段之间执行 32 位多数函数，并将结果 32 位值和以下三个其他 32 位值相加： "
    },
    {
      "name": "vsm3tt2aq_u32",
      "full name": "uint32x4_t vsm3tt2aq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c,const int imm2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM3TT2A takes three 128-bit vectors from three source SIMD&FP register and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a three-way exclusive OR of the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:",
      "function_cn": "SM3TT2A 从三个源 SIMD&FP 寄存器中获取三个 128 位向量和一个 2 位立即数索引值，并在目标 SIMD&FP 寄存器中返回一个 128 位结果。它对保存在第一个源向量的上三个元素中的三个 32 位字段执行三向异或，并将结果 32 位值与以下三个其他 32 位值相加："
    },
    {
      "name": "vsm3tt2bq_u32",
      "full name": "uint32x4_t vsm3tt2bq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c,const int imm2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM3TT2B takes three 128-bit vectors from three source SIMD&FP registers, and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a 32-bit majority function between the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:",
      "function_cn": "SM3TT2B 从三个源 SIMD&FP 寄存器中获取三个 128 位向量和一个 2 位立即数索引值，并在目标 SIMD&FP 寄存器中返回一个 128 位结果。它在第一个源向量的上三个元素中保存的三个 32 位字段之间执行 32 位多数函数，并将结果 32 位值和以下三个其他 32 位值相加： "
    },
    {
      "name": "vsm3partw1q_u32",
      "full name": "uint32x4_t vsm3partw1q_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM3PARTW1 takes three 128-bit vectors from the three source SIMD&FP registers and returns a 128-bit result in the destination SIMD&FP register. The result is obtained by a three-way exclusive OR of the elements within the input vectors with some fixed rotations, see the Operation pseudocode for more information.",
      "function_cn": "SM3PARTW1 从三个源 SIMD&FP 寄存器中获取三个 128 位向量，并在目标 SIMD&FP 寄存器中返回一个 128 位结果。结果是通过输入向量中元素具有一些固定旋转的三路异或获得的，有关更多信息，请参阅操作伪代码。"
    },
    {
      "name": "vsm3partw2q_u32",
      "full name": "uint32x4_t vsm3partw2q_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM3PARTW2 takes three 128-bit vectors from three source SIMD&FP registers and returns a 128-bit result in the destination SIMD&FP register. The result is obtained by a three-way exclusive OR of the elements within the input vectors with some fixed rotations, see the Operation pseudocode for more information.",
      "function_cn": "SM3PARTW2 从三个源 SIMD&FP 寄存器中获取三个 128 位向量，并在目标 SIMD&FP 寄存器中返回一个 128 位结果。结果是通过输入向量中元素具有一些固定旋转的三路异或获得的，有关更多信息，请参阅操作伪代码。 "
    },
    {
      "name": "vsm4eq_u32",
      "full name": "uint32x4_t vsm4eq_u32(uint32x4_t a,uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM4 Encode takes input data as a 128-bit vector from the first source SIMD&FP register, and four iterations of the round key held as the elements of the 128-bit vector in the second source SIMD&FP register. It encrypts the data by four rounds, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.",
      "function_cn": "SM4 Encode 将输入数据作为来自第一个源 SIMD&FP 寄存器的 128 位向量，并将轮密钥的四次迭代作为第二个源 SIMD&FP 寄存器中的 128 位向量的元素。它按照SM4标准对数据进行四轮加密，将128位结果返回到目的SIMD&FP寄存器。"
    },
    {
      "name": "vsm4ekeyq_u32",
      "full name": "uint32x4_t vsm4ekeyq_u32(uint32x4_t a,uint32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "SM4 Key takes an input as a 128-bit vector from the first source SIMD&FP register and a 128-bit constant from the second SIMD&FP register. It derives four iterations of the output key, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.",
      "function_cn": "SM4 Key 将输入值作为来自第一个源 SIMD&FP 寄存器的 128 位向量和来自第二个 SIMD&FP 寄存器的 128 位常量。它根据 SM4 标准导出输出密钥的四次迭代，将 128 位结果返回到目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vfmlal_low_f16",
      "full name": "float32x2_t vfmlal_low_f16(float32x2_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlsl_low_f16",
      "full name": "float32x2_t vfmlsl_low_f16(float32x2_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlalq_low_f16",
      "full name": "float32x4_t vfmlalq_low_f16(float32x4_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlslq_low_f16",
      "full name": "float32x4_t vfmlslq_low_f16(float32x4_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlal_high_f16",
      "full name": "float32x2_t vfmlal_high_f16(float32x2_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlsl_high_f16",
      "full name": "float32x2_t vfmlsl_high_f16(float32x2_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlalq_high_f16",
      "full name": "float32x4_t vfmlalq_high_f16(float32x4_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlslq_high_f16",
      "full name": "float32x4_t vfmlslq_high_f16(float32x4_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlal_lane_low_f16",
      "full name": "float32x2_t vfmlal_lane_low_f16(float32x2_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlal_laneq_low_f16",
      "full name": "float32x2_t vfmlal_laneq_low_f16(float32x2_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlalq_lane_low_f16",
      "full name": "float32x4_t vfmlalq_lane_low_f16(float32x4_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlalq_laneq_low_f16",
      "full name": "float32x4_t vfmlalq_laneq_low_f16(float32x4_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlsl_lane_low_f16",
      "full name": "float32x2_t vfmlsl_lane_low_f16(float32x2_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlsl_laneq_low_f16",
      "full name": "float32x2_t vfmlsl_laneq_low_f16(float32x2_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlslq_lane_low_f16",
      "full name": "float32x4_t vfmlslq_lane_low_f16(float32x4_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlslq_laneq_low_f16",
      "full name": "float32x4_t vfmlslq_laneq_low_f16(float32x4_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlal_lane_high_f16",
      "full name": "float32x2_t vfmlal_lane_high_f16(float32x2_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlsl_lane_high_f16",
      "full name": "float32x2_t vfmlsl_lane_high_f16(float32x2_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlalq_lane_high_f16",
      "full name": "float32x4_t vfmlalq_lane_high_f16(float32x4_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlslq_lane_high_f16",
      "full name": "float32x4_t vfmlslq_lane_high_f16(float32x4_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlal_laneq_high_f16",
      "full name": "float32x2_t vfmlal_laneq_high_f16(float32x2_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlsl_laneq_high_f16",
      "full name": "float32x2_t vfmlsl_laneq_high_f16(float32x2_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vfmlalq_laneq_high_f16",
      "full name": "float32x4_t vfmlalq_laneq_high_f16(float32x4_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "浮点融合乘加Long到累加器（向量）。该指令将两个源 SIMD&FP 寄存器中向量中对应的半精度浮点值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。"
    },
    {
      "name": "vfmlslq_laneq_high_f16",
      "full name": "float32x4_t vfmlslq_laneq_high_f16(float32x4_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.",
      "function_cn": "从累加器（向量）中进行浮点融合乘减Long。该指令将一个 SIMD&FP 寄存器的向量中的值取反，将这些值与另一个向量中的相应值相乘，并将乘积累加到目标 SIMD&FP 寄存器的相应向量元素中。该指令不会在累加之前舍入乘法的结果。 "
    },
    {
      "name": "vcadd_rot90_f16",
      "full name": "float16x4_t vcadd_rot90_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcadd_rot90_f32",
      "full name": "float32x2_t vcadd_rot90_f32(float32x2_t a,float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcaddq_rot90_f16",
      "full name": "float16x8_t vcaddq_rot90_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcaddq_rot90_f32",
      "full name": "float32x4_t vcaddq_rot90_f32(float32x4_t a,float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcaddq_rot90_f64",
      "full name": "float64x2_t vcaddq_rot90_f64(float64x2_t a,float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcadd_rot270_f16",
      "full name": "float16x4_t vcadd_rot270_f16(float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcadd_rot270_f32",
      "full name": "float32x2_t vcadd_rot270_f32(float32x2_t a,float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcaddq_rot270_f16",
      "full name": "float16x8_t vcaddq_rot270_f16(float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcaddq_rot270_f32",
      "full name": "float32x4_t vcaddq_rot270_f32(float32x4_t a,float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcaddq_rot270_f64",
      "full name": "float64x2_t vcaddq_rot270_f64(float64x2_t a,float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Add.",
      "function_cn": "浮点复数相加。"
    },
    {
      "name": "vcmla_f16",
      "full name": "float16x4_t vcmla_f16(float16x4_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_f32",
      "full name": "float32x2_t vcmla_f32(float32x2_t r,float32x2_t a,float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_lane_f16",
      "full name": "float16x4_t vcmla_lane_f16(float16x4_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_lane_f32",
      "full name": "float32x2_t vcmla_lane_f32(float32x2_t r,float32x2_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_laneq_f16",
      "full name": "float16x4_t vcmla_laneq_f16(float16x4_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_laneq_f32",
      "full name": "float32x2_t vcmla_laneq_f32(float32x2_t r,float32x2_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_f16",
      "full name": "float16x8_t vcmlaq_f16(float16x8_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_f32",
      "full name": "float32x4_t vcmlaq_f32(float32x4_t r,float32x4_t a,float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_f64",
      "full name": "float64x2_t vcmlaq_f64(float64x2_t r,float64x2_t a,float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_lane_f16",
      "full name": "float16x8_t vcmlaq_lane_f16(float16x8_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_lane_f32",
      "full name": "float32x4_t vcmlaq_lane_f32(float32x4_t r,float32x4_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_laneq_f16",
      "full name": "float16x8_t vcmlaq_laneq_f16(float16x8_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_laneq_f32",
      "full name": "float32x4_t vcmlaq_laneq_f32(float32x4_t r,float32x4_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot90_f16",
      "full name": "float16x4_t vcmla_rot90_f16(float16x4_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot90_f32",
      "full name": "float32x2_t vcmla_rot90_f32(float32x2_t r,float32x2_t a,float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot90_lane_f16",
      "full name": "float16x4_t vcmla_rot90_lane_f16(float16x4_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot90_lane_f32",
      "full name": "float32x2_t vcmla_rot90_lane_f32(float32x2_t r,float32x2_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot90_laneq_f16",
      "full name": "float16x4_t vcmla_rot90_laneq_f16(float16x4_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot90_laneq_f32",
      "full name": "float32x2_t vcmla_rot90_laneq_f32(float32x2_t r,float32x2_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot90_f16",
      "full name": "float16x8_t vcmlaq_rot90_f16(float16x8_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot90_f32",
      "full name": "float32x4_t vcmlaq_rot90_f32(float32x4_t r,float32x4_t a,float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot90_f64",
      "full name": "float64x2_t vcmlaq_rot90_f64(float64x2_t r,float64x2_t a,float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot90_lane_f16",
      "full name": "float16x8_t vcmlaq_rot90_lane_f16(float16x8_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot90_lane_f32",
      "full name": "float32x4_t vcmlaq_rot90_lane_f32(float32x4_t r,float32x4_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot90_laneq_f16",
      "full name": "float16x8_t vcmlaq_rot90_laneq_f16(float16x8_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot90_laneq_f32",
      "full name": "float32x4_t vcmlaq_rot90_laneq_f32(float32x4_t r,float32x4_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot180_f16",
      "full name": "float16x4_t vcmla_rot180_f16(float16x4_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot180_f32",
      "full name": "float32x2_t vcmla_rot180_f32(float32x2_t r,float32x2_t a,float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot180_lane_f16",
      "full name": "float16x4_t vcmla_rot180_lane_f16(float16x4_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot180_lane_f32",
      "full name": "float32x2_t vcmla_rot180_lane_f32(float32x2_t r,float32x2_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot180_laneq_f16",
      "full name": "float16x4_t vcmla_rot180_laneq_f16(float16x4_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot180_laneq_f32",
      "full name": "float32x2_t vcmla_rot180_laneq_f32(float32x2_t r,float32x2_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot180_f16",
      "full name": "float16x8_t vcmlaq_rot180_f16(float16x8_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot180_f32",
      "full name": "float32x4_t vcmlaq_rot180_f32(float32x4_t r,float32x4_t a,float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot180_f64",
      "full name": "float64x2_t vcmlaq_rot180_f64(float64x2_t r,float64x2_t a,float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot180_lane_f16",
      "full name": "float16x8_t vcmlaq_rot180_lane_f16(float16x8_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot180_lane_f32",
      "full name": "float32x4_t vcmlaq_rot180_lane_f32(float32x4_t r,float32x4_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot180_laneq_f16",
      "full name": "float16x8_t vcmlaq_rot180_laneq_f16(float16x8_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot180_laneq_f32",
      "full name": "float32x4_t vcmlaq_rot180_laneq_f32(float32x4_t r,float32x4_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot270_f16",
      "full name": "float16x4_t vcmla_rot270_f16(float16x4_t r,float16x4_t a,float16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot270_f32",
      "full name": "float32x2_t vcmla_rot270_f32(float32x2_t r,float32x2_t a,float32x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot270_lane_f16",
      "full name": "float16x4_t vcmla_rot270_lane_f16(float16x4_t r,float16x4_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot270_lane_f32",
      "full name": "float32x2_t vcmla_rot270_lane_f32(float32x2_t r,float32x2_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot270_laneq_f16",
      "full name": "float16x4_t vcmla_rot270_laneq_f16(float16x4_t r,float16x4_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmla_rot270_laneq_f32",
      "full name": "float32x2_t vcmla_rot270_laneq_f32(float32x2_t r,float32x2_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot270_f16",
      "full name": "float16x8_t vcmlaq_rot270_f16(float16x8_t r,float16x8_t a,float16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot270_f32",
      "full name": "float32x4_t vcmlaq_rot270_f32(float32x4_t r,float32x4_t a,float32x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot270_f64",
      "full name": "float64x2_t vcmlaq_rot270_f64(float64x2_t r,float64x2_t a,float64x2_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot270_lane_f16",
      "full name": "float16x8_t vcmlaq_rot270_lane_f16(float16x8_t r,float16x8_t a,float16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot270_lane_f32",
      "full name": "float32x4_t vcmlaq_rot270_lane_f32(float32x4_t r,float32x4_t a,float32x2_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot270_laneq_f16",
      "full name": "float16x8_t vcmlaq_rot270_laneq_f16(float16x8_t r,float16x8_t a,float16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vcmlaq_rot270_laneq_f32",
      "full name": "float32x4_t vcmlaq_rot270_laneq_f32(float32x4_t r,float32x4_t a,float32x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Complex Multiply Accumulate.",
      "function_cn": "浮点复数乘法累加。"
    },
    {
      "name": "vrnd32z_f32",
      "full name": "float32x2_t vrnd32z_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 32 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd32zq_f32",
      "full name": "float32x4_t vrnd32zq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 32 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd32z_f64",
      "full name": "float64x1_t vrnd32z_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 32 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd32zq_f64",
      "full name": "float64x2_t vrnd32zq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 32 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd64z_f32",
      "full name": "float32x2_t vrnd64z_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 64 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd64zq_f32",
      "full name": "float32x4_t vrnd64zq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 64 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd64z_f64",
      "full name": "float64x1_t vrnd64z_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 64 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd64zq_f64",
      "full name": "float64x2_t vrnd64zq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入到 64 位整数向零（向量）。此指令使用向零舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vrnd32x_f32",
      "full name": "float32x2_t vrnd32x_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 32 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vrnd32xq_f32",
      "full name": "float32x4_t vrnd32xq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 32 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vrnd32x_f64",
      "full name": "float64x1_t vrnd32x_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 32 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vrnd32xq_f64",
      "full name": "float64x2_t vrnd32xq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 32-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 32 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 32 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vrnd64x_f32",
      "full name": "float32x2_t vrnd64x_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 64 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vrnd64xq_f32",
      "full name": "float32x4_t vrnd64xq_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 64 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vrnd64x_f64",
      "full name": "float64x1_t vrnd64x_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 64 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vrnd64xq_f64",
      "full name": "float64x2_t vrnd64xq_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point Round to 64-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.",
      "function_cn": "浮点数舍入为 64 位整数，使用当前舍入模式（向量）。此指令使用FPCR确定的舍入模式将 SIMD&FP 源寄存器中的浮点值向量舍入为适合 64 位整数大小的整数浮点值，并将结果写入 SIMD&FP 目标寄存器."
    },
    {
      "name": "vmmlaq_s32",
      "full name": "int32x4_t vmmlaq_s32(int32x4_t r,int8x16_t a,int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Signed 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of signed 8-bit integer values in the first source vector by the 8x2 matrix of signed 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.",
      "function_cn": "有符号 8 位整数矩阵乘法累加。此指令将第一个源向量中的有符号 8 位整数值的 2x8 矩阵乘以第二个源向量中的有符号 8 位整数值的 8x2 矩阵。生成的 2x2 32 位整数矩阵乘积被破坏性地添加到目标向量中的 32 位整数矩阵累加器。这相当于对每个目标元素执行 8 路点积。 "
    },
    {
      "name": "vmmlaq_u32",
      "full name": "uint32x4_t vmmlaq_u32(uint32x4_t r,uint8x16_t a,uint8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of unsigned 8-bit integer values in the first source vector by the 8x2 matrix of unsigned 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.",
      "function_cn": "无符号 8 位整数矩阵乘法累加。此指令将第一个源向量中无符号 8 位整数值的 2x8 矩阵乘以第二个源向量中无符号 8 位整数值的 8x2 矩阵。生成的 2x2 32 位整数矩阵乘积被破坏性地添加到目标向量中的 32 位整数矩阵累加器。这相当于对每个目标元素执行 8 路点积。\n\n "
    },
    {
      "name": "vusmmlaq_s32",
      "full name": "int32x4_t vusmmlaq_s32(int32x4_t r,uint8x16_t a,int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Unsigned and signed 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of unsigned 8-bit integer values in the first source vector by the 8x2 matrix of signed 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.",
      "function_cn": "无符号和有符号 8 位整数矩阵乘法累加。此指令将第一个源向量中的 2x8 无符号 8 位整数值矩阵乘以第二个源向量中的有符号 8 位整数值的 8x2 矩阵。生成的 2x2 32 位整数矩阵乘积被破坏性地添加到目标向量中的 32 位整数矩阵累加器。这相当于对每个目标元素执行 8 路点积。"
    },
    {
      "name": "vusdot_s32",
      "full name": "int32x2_t vusdot_s32(int32x2_t r,uint8x8_t a,int8x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "带无符号和有符号整数的点积向量形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的相应 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。 "
    },
    {
      "name": "vusdot_lane_s32",
      "full name": "int32x2_t vusdot_lane_s32(int32x2_t r,uint8x8_t a,int8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "带无符号和有符号整数的点积向量形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的相应 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。 "
    },
    {
      "name": "vsudot_lane_s32",
      "full name": "int32x2_t vsudot_lane_s32(int32x2_t r,int8x8_t a,uint8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot product index form with signed and unsigned integers. This instruction performs the dot product of the four signed 8-bit integer values in each 32-bit element of the first source register with the four unsigned 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination vector.",
      "function_cn": "带无符号和有符号整数的点积索引形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的索引 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。"
    },
    {
      "name": "vusdot_laneq_s32",
      "full name": "int32x2_t vusdot_laneq_s32(int32x2_t r,uint8x8_t a,int8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "带无符号和有符号整数的点积向量形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的相应 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。 "
    },
    {
      "name": "vsudot_laneq_s32",
      "full name": "int32x2_t vsudot_laneq_s32(int32x2_t r,int8x8_t a,uint8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot product index form with signed and unsigned integers. This instruction performs the dot product of the four signed 8-bit integer values in each 32-bit element of the first source register with the four unsigned 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination vector.",
      "function_cn": "带无符号和有符号整数的点积索引形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的索引 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。"
    },
    {
      "name": "vusdotq_s32",
      "full name": "int32x4_t vusdotq_s32(int32x4_t r,uint8x16_t a,int8x16_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "带无符号和有符号整数的点积向量形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的相应 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。 "
    },
    {
      "name": "vusdotq_lane_s32",
      "full name": "int32x4_t vusdotq_lane_s32(int32x4_t r,uint8x16_t a,int8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "带无符号和有符号整数的点积向量形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的相应 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。 "
    },
    {
      "name": "vsudotq_lane_s32",
      "full name": "int32x4_t vsudotq_lane_s32(int32x4_t r,int8x16_t a,uint8x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot product index form with signed and unsigned integers. This instruction performs the dot product of the four signed 8-bit integer values in each 32-bit element of the first source register with the four unsigned 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination vector.",
      "function_cn": "带无符号和有符号整数的点积索引形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的索引 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。"
    },
    {
      "name": "vusdotq_laneq_s32",
      "full name": "int32x4_t vusdotq_laneq_s32(int32x4_t r,uint8x16_t a,int8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.",
      "function_cn": "带无符号和有符号整数的点积向量形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的相应 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。 "
    },
    {
      "name": "vsudotq_laneq_s32",
      "full name": "int32x4_t vsudotq_laneq_s32(int32x4_t r,int8x16_t a,uint8x16_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Dot product index form with signed and unsigned integers. This instruction performs the dot product of the four signed 8-bit integer values in each 32-bit element of the first source register with the four unsigned 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination vector.",
      "function_cn": "带无符号和有符号整数的点积索引形式。此指令执行第一个源寄存器的每个 32 位元素中的四个无符号 8 位整数值与第二个源寄存器的索引 32 位元素中的四个有符号 8 位整数值的点积，累加结果到目标寄存器的相应 32 位元素中。"
    },
    {
      "name": "vcreate_bf16",
      "full name": "bfloat16x4_t vcreate_bf16(uint64_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vdup_n_bf16",
      "full name": "bfloat16x4_t vdup_n_bf16(bfloat16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdupq_n_bf16",
      "full name": "bfloat16x8_t vdupq_n_bf16(bfloat16_t value)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdup_lane_bf16",
      "full name": "bfloat16x4_t vdup_lane_bf16(bfloat16x4_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdupq_lane_bf16",
      "full name": "bfloat16x8_t vdupq_lane_bf16(bfloat16x4_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdup_laneq_bf16",
      "full name": "bfloat16x4_t vdup_laneq_bf16(bfloat16x8_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vdupq_laneq_bf16",
      "full name": "bfloat16x8_t vdupq_laneq_bf16(bfloat16x8_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vcombine_bf16",
      "full name": "bfloat16x8_t vcombine_bf16(bfloat16x4_t low,bfloat16x4_t high)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vget_high_bf16",
      "full name": "bfloat16x4_t vget_high_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vget_low_bf16",
      "full name": "bfloat16x4_t vget_low_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vget_lane_bf16",
      "full name": "bfloat16_t vget_lane_bf16(bfloat16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vgetq_lane_bf16",
      "full name": "bfloat16_t vgetq_lane_bf16(bfloat16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vset_lane_bf16",
      "full name": "bfloat16x4_t vset_lane_bf16(bfloat16_t a,bfloat16x4_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vsetq_lane_bf16",
      "full name": "bfloat16x8_t vsetq_lane_bf16(bfloat16_t a,bfloat16x8_t v,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vduph_lane_bf16",
      "full name": "bfloat16_t vduph_lane_bf16(bfloat16x4_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vduph_laneq_bf16",
      "full name": "bfloat16_t vduph_laneq_bf16(bfloat16x8_t vec,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.",
      "function_cn": " 将向量元素复制到向量或标量。此指令将源 SIMD&FP 寄存器中指定元素索引处的向量元素复制为标量或向量中的每个元素，并将结果写入目标 SIMD&FP 寄存器。"
    },
    {
      "name": "vld1_bf16",
      "full name": "bfloat16x4_t vld1_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1q_bf16",
      "full name": "bfloat16x8_t vld1q_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1_lane_bf16",
      "full name": "bfloat16x4_t vld1_lane_bf16(bfloat16_t const * ptr,bfloat16x4_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1q_lane_bf16",
      "full name": "bfloat16x8_t vld1q_lane_bf16(bfloat16_t const * ptr,bfloat16x8_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1_dup_bf16",
      "full name": "bfloat16x4_t vld1_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.",
      "function_cn": "加载一个单元素结构并复制到所有并行道（一个寄存器的）。该指令从内存加载单元素结构并将该结构复制到 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vld1q_dup_bf16",
      "full name": "bfloat16x8_t vld1q_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.",
      "function_cn": "加载一个单元素结构并复制到所有并行道（一个寄存器的）。该指令从内存加载单元素结构并将该结构复制到 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vst1_bf16",
      "full name": "void vst1_bf16(bfloat16_t * ptr,bfloat16x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1q_bf16",
      "full name": "void vst1q_bf16(bfloat16_t * ptr,bfloat16x8_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1_lane_bf16",
      "full name": "void vst1_lane_bf16(bfloat16_t * ptr,bfloat16x4_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1q_lane_bf16",
      "full name": "void vst1q_lane_bf16(bfloat16_t * ptr,bfloat16x8_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vld2_bf16",
      "full name": "bfloat16x4x2_t vld2_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 2-element structures to two registers. This instruction loads multiple 2-element structures from memory and writes the result to the two SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 2 元素结构加载到两个寄存器。该指令从内存中加载多个 2 元素结构并将结果写入两个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld2q_bf16",
      "full name": "bfloat16x8x2_t vld2q_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 2-element structures to two registers. This instruction loads multiple 2-element structures from memory and writes the result to the two SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 2 元素结构加载到两个寄存器。该指令从内存中加载多个 2 元素结构并将结果写入两个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld3_bf16",
      "full name": "bfloat16x4x3_t vld3_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 3-element structures to three registers. This instruction loads multiple 3-element structures from memory and writes the result to the three SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 3 元素结构加载到三个寄存器。该指令从内存中加载多个 3 元素结构并将结果写入三个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld3q_bf16",
      "full name": "bfloat16x8x3_t vld3q_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 3-element structures to three registers. This instruction loads multiple 3-element structures from memory and writes the result to the three SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 3 元素结构加载到三个寄存器。该指令从内存中加载多个 3 元素结构并将结果写入三个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld4_bf16",
      "full name": "bfloat16x4x4_t vld4_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 4-element structures to four registers. This instruction loads multiple 4-element structures from memory and writes the result to the four SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 4 元素结构加载到四个寄存器。该指令从内存中加载多个 4 元素结构并将结果写入四个 SIMD&FP 寄存器，并进行去交错。 "
    },
    {
      "name": "vld4q_bf16",
      "full name": "bfloat16x8x4_t vld4q_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 4-element structures to four registers. This instruction loads multiple 4-element structures from memory and writes the result to the four SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 4 元素结构加载到四个寄存器。该指令从内存中加载多个 4 元素结构并将结果写入四个 SIMD&FP 寄存器，并进行去交错。 "
    },
    {
      "name": "vld2_dup_bf16",
      "full name": "bfloat16x4x2_t vld2_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.",
      "function_cn": "加载单个 2 元素结构并复制到两个寄存器的所有并行道。该指令从内存加载一个 2 元素结构并将该结构复制到两个 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vld2q_dup_bf16",
      "full name": "bfloat16x8x2_t vld2q_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.",
      "function_cn": "加载单个 2 元素结构并复制到两个寄存器的所有并行道。该指令从内存加载一个 2 元素结构并将该结构复制到两个 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vld3_dup_bf16",
      "full name": "bfloat16x4x3_t vld3_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.",
      "function_cn": "加载单个 3 元素结构并复制到三个寄存器的所有并行道。该指令从内存加载一个 3 元素结构并将该结构复制到三个 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vld3q_dup_bf16",
      "full name": "bfloat16x8x3_t vld3q_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.",
      "function_cn": "加载单个 3 元素结构并复制到三个寄存器的所有并行道。该指令从内存加载一个 3 元素结构并将该结构复制到三个 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vld4_dup_bf16",
      "full name": "bfloat16x4x4_t vld4_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.",
      "function_cn": "加载单个 4 元素结构并复制到四个寄存器的所有并行道。该指令从内存加载一个 4 元素结构并将该结构复制到四个 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vld4q_dup_bf16",
      "full name": "bfloat16x8x4_t vld4q_dup_bf16(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.",
      "function_cn": "加载单个 4 元素结构并复制到四个寄存器的所有并行道。该指令从内存加载一个 4 元素结构并将该结构复制到四个 SIMD&FP 寄存器的所有并行道。"
    },
    {
      "name": "vst2_bf16",
      "full name": "void vst2_bf16(bfloat16_t * ptr,bfloat16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 2-element structures from two registers. This instruction stores multiple 2-element structures from two SIMD&FP registers to memory, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自两个寄存器的多个 2 元素结构。该指令将两个 SIMD&FP 寄存器中的多个 2 元素结构存储到内存中，并进行交错。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst2q_bf16",
      "full name": "void vst2q_bf16(bfloat16_t * ptr,bfloat16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 2-element structures from two registers. This instruction stores multiple 2-element structures from two SIMD&FP registers to memory, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自两个寄存器的多个 2 元素结构。该指令将两个 SIMD&FP 寄存器中的多个 2 元素结构存储到内存中，并进行交错。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst3_bf16",
      "full name": "void vst3_bf16(bfloat16_t * ptr,bfloat16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 3-element structures from three registers. This instruction stores multiple 3-element structures to memory from three SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自三个寄存器的多个 3 元素结构。该指令将三个 SIMD&FP 寄存器中的多个 3 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst3q_bf16",
      "full name": "void vst3q_bf16(bfloat16_t * ptr,bfloat16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 3-element structures from three registers. This instruction stores multiple 3-element structures to memory from three SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自三个寄存器的多个 3 元素结构。该指令将三个 SIMD&FP 寄存器中的多个 3 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst4_bf16",
      "full name": "void vst4_bf16(bfloat16_t * ptr,bfloat16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 4-element structures from four registers. This instruction stores multiple 4-element structures to memory from four SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自四个寄存器的多个 4 元素结构。该指令将四个 SIMD&FP 寄存器中的多个 4 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst4q_bf16",
      "full name": "void vst4q_bf16(bfloat16_t * ptr,bfloat16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 4-element structures from four registers. This instruction stores multiple 4-element structures to memory from four SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自四个寄存器的多个 4 元素结构。该指令将四个 SIMD&FP 寄存器中的多个 4 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vld2_lane_bf16",
      "full name": "bfloat16x4x2_t vld2_lane_bf16(bfloat16_t const * ptr,bfloat16x4x2_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 2-element structures to two registers. This instruction loads multiple 2-element structures from memory and writes the result to the two SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 2 元素结构加载到两个寄存器。该指令从内存中加载多个 2 元素结构并将结果写入两个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld2q_lane_bf16",
      "full name": "bfloat16x8x2_t vld2q_lane_bf16(bfloat16_t const * ptr,bfloat16x8x2_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 2-element structures to two registers. This instruction loads multiple 2-element structures from memory and writes the result to the two SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 2 元素结构加载到两个寄存器。该指令从内存中加载多个 2 元素结构并将结果写入两个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld3_lane_bf16",
      "full name": "bfloat16x4x3_t vld3_lane_bf16(bfloat16_t const * ptr,bfloat16x4x3_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 3-element structures to three registers. This instruction loads multiple 3-element structures from memory and writes the result to the three SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 3 元素结构加载到三个寄存器。该指令从内存中加载多个 3 元素结构并将结果写入三个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld3q_lane_bf16",
      "full name": "bfloat16x8x3_t vld3q_lane_bf16(bfloat16_t const * ptr,bfloat16x8x3_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 3-element structures to three registers. This instruction loads multiple 3-element structures from memory and writes the result to the three SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 3 元素结构加载到三个寄存器。该指令从内存中加载多个 3 元素结构并将结果写入三个 SIMD&FP 寄存器，并进行去交错。"
    },
    {
      "name": "vld4_lane_bf16",
      "full name": "bfloat16x4x4_t vld4_lane_bf16(bfloat16_t const * ptr,bfloat16x4x4_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 4-element structures to four registers. This instruction loads multiple 4-element structures from memory and writes the result to the four SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 4 元素结构加载到四个寄存器。该指令从内存中加载多个 4 元素结构并将结果写入四个 SIMD&FP 寄存器，并进行去交错。 "
    },
    {
      "name": "vld4q_lane_bf16",
      "full name": "bfloat16x8x4_t vld4q_lane_bf16(bfloat16_t const * ptr,bfloat16x8x4_t src,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple 4-element structures to four registers. This instruction loads multiple 4-element structures from memory and writes the result to the four SIMD&FP registers, with de-interleaving.",
      "function_cn": "将多个 4 元素结构加载到四个寄存器。该指令从内存中加载多个 4 元素结构并将结果写入四个 SIMD&FP 寄存器，并进行去交错。 "
    },
    {
      "name": "vst2_lane_bf16",
      "full name": "void vst2_lane_bf16(bfloat16_t * ptr,bfloat16x4x2_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 2-element structures from two registers. This instruction stores multiple 2-element structures from two SIMD&FP registers to memory, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自两个寄存器的多个 2 元素结构。该指令将两个 SIMD&FP 寄存器中的多个 2 元素结构存储到内存中，并进行交错。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst2q_lane_bf16",
      "full name": "void vst2q_lane_bf16(bfloat16_t * ptr,bfloat16x8x2_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 2-element structures from two registers. This instruction stores multiple 2-element structures from two SIMD&FP registers to memory, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自两个寄存器的多个 2 元素结构。该指令将两个 SIMD&FP 寄存器中的多个 2 元素结构存储到内存中，并进行交错。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst3_lane_bf16",
      "full name": "void vst3_lane_bf16(bfloat16_t * ptr,bfloat16x4x3_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 3-element structures from three registers. This instruction stores multiple 3-element structures to memory from three SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自三个寄存器的多个 3 元素结构。该指令将三个 SIMD&FP 寄存器中的多个 3 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst3q_lane_bf16",
      "full name": "void vst3q_lane_bf16(bfloat16_t * ptr,bfloat16x8x3_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 3-element structures from three registers. This instruction stores multiple 3-element structures to memory from three SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自三个寄存器的多个 3 元素结构。该指令将三个 SIMD&FP 寄存器中的多个 3 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst4_lane_bf16",
      "full name": "void vst4_lane_bf16(bfloat16_t * ptr,bfloat16x4x4_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 4-element structures from four registers. This instruction stores multiple 4-element structures to memory from four SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自四个寄存器的多个 4 元素结构。该指令将四个 SIMD&FP 寄存器中的多个 4 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst4q_lane_bf16",
      "full name": "void vst4q_lane_bf16(bfloat16_t * ptr,bfloat16x8x4_t val,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple 4-element structures from four registers. This instruction stores multiple 4-element structures to memory from four SIMD&FP registers, with interleaving. Every element of each register is stored.",
      "function_cn": "存储来自四个寄存器的多个 4 元素结构。该指令将四个 SIMD&FP 寄存器中的多个 4 元素结构以交错方式存储到内存中。每个寄存器的每个元素都被存储。"
    },
    {
      "name": "vst1_bf16_x2",
      "full name": "void vst1_bf16_x2(bfloat16_t * ptr,bfloat16x4x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1q_bf16_x2",
      "full name": "void vst1q_bf16_x2(bfloat16_t * ptr,bfloat16x8x2_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1_bf16_x3",
      "full name": "void vst1_bf16_x3(bfloat16_t * ptr,bfloat16x4x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1q_bf16_x3",
      "full name": "void vst1q_bf16_x3(bfloat16_t * ptr,bfloat16x8x3_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1_bf16_x4",
      "full name": "void vst1_bf16_x4(bfloat16_t * ptr,bfloat16x4x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vst1q_bf16_x4",
      "full name": "void vst1q_bf16_x4(bfloat16_t * ptr,bfloat16x8x4_t val)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.",
      "function_cn": "存储来自一个、两个、三个或四个寄存器的多个单元素结构。该指令将元素从一个、两个、三个或四个 SIMD&FP 寄存器存储到内存中，无需交错。每个寄存器的每个元素都被存储。 "
    },
    {
      "name": "vld1_bf16_x2",
      "full name": "bfloat16x4x2_t vld1_bf16_x2(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1q_bf16_x2",
      "full name": "bfloat16x8x2_t vld1q_bf16_x2(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1_bf16_x3",
      "full name": "bfloat16x4x3_t vld1_bf16_x3(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1q_bf16_x3",
      "full name": "bfloat16x8x3_t vld1q_bf16_x3(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1_bf16_x4",
      "full name": "bfloat16x4x4_t vld1_bf16_x4(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vld1q_bf16_x4",
      "full name": "bfloat16x8x4_t vld1q_bf16_x4(bfloat16_t const * ptr)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.",
      "function_cn": "将多个单元素结构加载到一个、两个、三个或四个寄存器。该指令从内存加载多个单元素结构并将结果写入一个、两个、三个或四个 SIMD&FP 寄存器。 "
    },
    {
      "name": "vreinterpret_bf16_s8",
      "full name": "bfloat16x4_t vreinterpret_bf16_s8(int8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_s16",
      "full name": "bfloat16x4_t vreinterpret_bf16_s16(int16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_s32",
      "full name": "bfloat16x4_t vreinterpret_bf16_s32(int32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_f32",
      "full name": "bfloat16x4_t vreinterpret_bf16_f32(float32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_u8",
      "full name": "bfloat16x4_t vreinterpret_bf16_u8(uint8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_u16",
      "full name": "bfloat16x4_t vreinterpret_bf16_u16(uint16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_u32",
      "full name": "bfloat16x4_t vreinterpret_bf16_u32(uint32x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_p8",
      "full name": "bfloat16x4_t vreinterpret_bf16_p8(poly8x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_p16",
      "full name": "bfloat16x4_t vreinterpret_bf16_p16(poly16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_u64",
      "full name": "bfloat16x4_t vreinterpret_bf16_u64(uint64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_s64",
      "full name": "bfloat16x4_t vreinterpret_bf16_s64(int64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_s8",
      "full name": "bfloat16x8_t vreinterpretq_bf16_s8(int8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_s16",
      "full name": "bfloat16x8_t vreinterpretq_bf16_s16(int16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_s32",
      "full name": "bfloat16x8_t vreinterpretq_bf16_s32(int32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_f32",
      "full name": "bfloat16x8_t vreinterpretq_bf16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_u8",
      "full name": "bfloat16x8_t vreinterpretq_bf16_u8(uint8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_u16",
      "full name": "bfloat16x8_t vreinterpretq_bf16_u16(uint16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_u32",
      "full name": "bfloat16x8_t vreinterpretq_bf16_u32(uint32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_p8",
      "full name": "bfloat16x8_t vreinterpretq_bf16_p8(poly8x16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_p16",
      "full name": "bfloat16x8_t vreinterpretq_bf16_p16(poly16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_u64",
      "full name": "bfloat16x8_t vreinterpretq_bf16_u64(uint64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_s64",
      "full name": "bfloat16x8_t vreinterpretq_bf16_s64(int64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_f64",
      "full name": "bfloat16x4_t vreinterpret_bf16_f64(float64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_f64",
      "full name": "bfloat16x8_t vreinterpretq_bf16_f64(float64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_bf16_p64",
      "full name": "bfloat16x4_t vreinterpret_bf16_p64(poly64x1_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_p64",
      "full name": "bfloat16x8_t vreinterpretq_bf16_p64(poly64x2_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_bf16_p128",
      "full name": "bfloat16x8_t vreinterpretq_bf16_p128(poly128_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_s8_bf16",
      "full name": "int8x8_t vreinterpret_s8_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_s16_bf16",
      "full name": "int16x4_t vreinterpret_s16_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_s32_bf16",
      "full name": "int32x2_t vreinterpret_s32_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_f32_bf16",
      "full name": "float32x2_t vreinterpret_f32_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_u8_bf16",
      "full name": "uint8x8_t vreinterpret_u8_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_u16_bf16",
      "full name": "uint16x4_t vreinterpret_u16_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_u32_bf16",
      "full name": "uint32x2_t vreinterpret_u32_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_p8_bf16",
      "full name": "poly8x8_t vreinterpret_p8_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_p16_bf16",
      "full name": "poly16x4_t vreinterpret_p16_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_u64_bf16",
      "full name": "uint64x1_t vreinterpret_u64_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_s64_bf16",
      "full name": "int64x1_t vreinterpret_s64_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_f64_bf16",
      "full name": "float64x1_t vreinterpret_f64_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpret_p64_bf16",
      "full name": "poly64x1_t vreinterpret_p64_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_s8_bf16",
      "full name": "int8x16_t vreinterpretq_s8_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_s16_bf16",
      "full name": "int16x8_t vreinterpretq_s16_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_s32_bf16",
      "full name": "int32x4_t vreinterpretq_s32_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_f32_bf16",
      "full name": "float32x4_t vreinterpretq_f32_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_u8_bf16",
      "full name": "uint8x16_t vreinterpretq_u8_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_u16_bf16",
      "full name": "uint16x8_t vreinterpretq_u16_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_u32_bf16",
      "full name": "uint32x4_t vreinterpretq_u32_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_p8_bf16",
      "full name": "poly8x16_t vreinterpretq_p8_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_p16_bf16",
      "full name": "poly16x8_t vreinterpretq_p16_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_u64_bf16",
      "full name": "uint64x2_t vreinterpretq_u64_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_s64_bf16",
      "full name": "int64x2_t vreinterpretq_s64_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_f64_bf16",
      "full name": "float64x2_t vreinterpretq_f64_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_p64_bf16",
      "full name": "poly64x2_t vreinterpretq_p64_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vreinterpretq_p128_bf16",
      "full name": "poly128_t vreinterpretq_p128_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Vector reinterpret cast operation",
      "function_cn": "向量重新解释转换操作"
    },
    {
      "name": "vcvt_f32_bf16",
      "full name": "float32x4_t vcvt_f32_bf16(bfloat16x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.",
      "function_cn": "Shift Left Long（按元素大小）。此指令读取源 SIMD&FP 寄存器的下半部分或上半部分中的每个向量元素，将每个结果左移元素大小，将最终结果写入向量，并将向量写入目标 SIMD&FP 寄存器。目标向量元素的长度是源向量元素的两倍。 "
    },
    {
      "name": "vcvtq_low_f32_bf16",
      "full name": "float32x4_t vcvtq_low_f32_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.",
      "function_cn": "Shift Left Long（按元素大小）。此指令读取源 SIMD&FP 寄存器的下半部分或上半部分中的每个向量元素，将每个结果左移元素大小，将最终结果写入向量，并将向量写入目标 SIMD&FP 寄存器。目标向量元素的长度是源向量元素的两倍。 "
    },
    {
      "name": "vcvtq_high_f32_bf16",
      "full name": "float32x4_t vcvtq_high_f32_bf16(bfloat16x8_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.",
      "function_cn": "Shift Left Long（按元素大小）。此指令读取源 SIMD&FP 寄存器的下半部分或上半部分中的每个向量元素，将每个结果左移元素大小，将最终结果写入向量，并将向量写入目标 SIMD&FP 寄存器。目标向量元素的长度是源向量元素的两倍。 "
    },
    {
      "name": "vcvt_bf16_f32",
      "full name": "bfloat16x4_t vcvt_bf16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point convert from single-precision to BFloat16 format (vector) reads each single-precision element in the SIMD&FP source vector, converts each value to BFloat16 format, and writes the results in the lower or upper half of the SIMD&FP destination vector. The result elements are half the width of the source elements.",
      "function_cn": "从单精度到 BFloat16 格式的浮点转换（向量）读取 SIMD&FP 源向量中的每个单精度元素，将每个值转换为 BFloat16 格式，并将结果写入 SIMD&FP 目标向量的下半部分或上半部分。结果元素是源元素宽度的一半。 "
    },
    {
      "name": "vcvtq_low_bf16_f32",
      "full name": "bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point convert from single-precision to BFloat16 format (vector) reads each single-precision element in the SIMD&FP source vector, converts each value to BFloat16 format, and writes the results in the lower or upper half of the SIMD&FP destination vector. The result elements are half the width of the source elements.",
      "function_cn": "从单精度到 BFloat16 格式的浮点转换（向量）读取 SIMD&FP 源向量中的每个单精度元素，将每个值转换为 BFloat16 格式，并将结果写入 SIMD&FP 目标向量的下半部分或上半部分。结果元素是源元素宽度的一半。 "
    },
    {
      "name": "vcvtq_high_bf16_f32",
      "full name": "bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t inactive,float32x4_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point convert from single-precision to BFloat16 format (vector) reads each single-precision element in the SIMD&FP source vector, converts each value to BFloat16 format, and writes the results in the lower or upper half of the SIMD&FP destination vector. The result elements are half the width of the source elements.",
      "function_cn": "从单精度到 BFloat16 格式的浮点转换（向量）读取 SIMD&FP 源向量中的每个单精度元素，将每个值转换为 BFloat16 格式，并将结果写入 SIMD&FP 目标向量的下半部分或上半部分。结果元素是源元素宽度的一半。 "
    },
    {
      "name": "vcvth_bf16_f32",
      "full name": "bfloat16_t vcvth_bf16_f32(float32_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Floating-point convert from single-precision to BFloat16 format (scalar) converts the single-precision floating-point value in the 32-bit SIMD&FP source register to BFloat16 format and writes the result in the 16-bit SIMD&FP destination register.",
      "function_cn": "从单精度到BFloat16 格式的浮点转换（标量）将32 位SIMD&FP 源寄存器中的单精度浮点值转换为BFloat16 格式，并将结果写入16 位SIMD&FP 目标寄存器。 "
    },
    {
      "name": "vcvtah_f32_bf16",
      "full name": "float32_t vcvtah_f32_bf16(bfloat16_t a)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.",
      "function_cn": "左移（立即）。该指令从向量中读取每个值，将每个结果左移一个立即值，将最终结果写入向量，然后将向量写入目标 SIMD&FP 寄存器。 "
    },
    {
      "name": "vcopy_lane_bf16",
      "full name": "bfloat16x4_t vcopy_lane_bf16(bfloat16x4_t a,const int lane1,bfloat16x4_t b,const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vcopyq_lane_bf16",
      "full name": "bfloat16x8_t vcopyq_lane_bf16(bfloat16x8_t a,const int lane1,bfloat16x4_t b,const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vcopy_laneq_bf16",
      "full name": "bfloat16x4_t vcopy_laneq_bf16(bfloat16x4_t a,const int lane1,bfloat16x8_t b,const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vcopyq_laneq_bf16",
      "full name": "bfloat16x8_t vcopyq_laneq_bf16(bfloat16x8_t a,const int lane1,bfloat16x8_t b,const int lane2)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.",
      "function_cn": "从另一个向量元素插入向量元素。该指令将源 SIMD&FP 寄存器的向量元素复制到目标 SIMD&FP 寄存器的指定向量元素。 "
    },
    {
      "name": "vbfdot_f32",
      "full name": "float32x2_t vbfdot_f32(float32x2_t r,bfloat16x4_t a,bfloat16x4_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of 16-bit BF16 elements. Within each pair, the elements in the first source vector are multiplied by the corresponding elements in the second source vector. The resulting single-precision products are then summed and added destructively to the single-precision element of the destination vector that aligns with the pair of BF16 values in the first source vector. The instruction ignores the FPCR and does not update the FPSR exception status.",
      "function_cn": "BFloat16 浮点点积（向量）。此指令将源向量分隔为成对的 16 位 BF16 元素。在每一对中，第一个源向量中的元素乘以第二个源向量中的相应元素。然后将得到的单精度乘积相加并破坏性地添加到目标向量的单精度元素中，该元素与第一个源向量中的 BF16 值对对齐。该指令忽略FPCR并且不更新FPSR异常状态。 "
    },
    {
      "name": "vbfdotq_f32",
      "full name": "float32x4_t vbfdotq_f32(float32x4_t r,bfloat16x8_t a,bfloat16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of 16-bit BF16 elements. Within each pair, the elements in the first source vector are multiplied by the corresponding elements in the second source vector. The resulting single-precision products are then summed and added destructively to the single-precision element of the destination vector that aligns with the pair of BF16 values in the first source vector. The instruction ignores the FPCR and does not update the FPSR exception status.",
      "function_cn": "BFloat16 浮点点积（向量）。此指令将源向量分隔为成对的 16 位 BF16 元素。在每一对中，第一个源向量中的元素乘以第二个源向量中的相应元素。然后将得到的单精度乘积相加并破坏性地添加到目标向量的单精度元素中，该元素与第一个源向量中的 BF16 值对对齐。该指令忽略FPCR并且不更新FPSR异常状态。 "
    },
    {
      "name": "vbfdot_lane_f32",
      "full name": "float32x2_t vbfdot_lane_f32(float32x2_t r,bfloat16x4_t a,bfloat16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of 16-bit BF16 elements. Within each pair, the elements in the first source vector are multiplied by the corresponding elements in the second source vector. The resulting single-precision products are then summed and added destructively to the single-precision element of the destination vector that aligns with the pair of BF16 values in the first source vector. The instruction ignores the FPCR and does not update the FPSR exception status.",
      "function_cn": "BFloat16 浮点点积（向量）。此指令将源向量分隔为成对的 16 位 BF16 元素。在每一对中，第一个源向量中的元素乘以第二个源向量中的相应元素。然后将得到的单精度乘积相加并破坏性地添加到目标向量的单精度元素中，该元素与第一个源向量中的 BF16 值对对齐。该指令忽略FPCR并且不更新FPSR异常状态。 "
    },
    {
      "name": "vbfdotq_laneq_f32",
      "full name": "float32x4_t vbfdotq_laneq_f32(float32x4_t r,bfloat16x8_t a,bfloat16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of 16-bit BF16 elements. Within each pair, the elements in the first source vector are multiplied by the corresponding elements in the second source vector. The resulting single-precision products are then summed and added destructively to the single-precision element of the destination vector that aligns with the pair of BF16 values in the first source vector. The instruction ignores the FPCR and does not update the FPSR exception status.",
      "function_cn": "BFloat16 浮点点积（向量）。此指令将源向量分隔为成对的 16 位 BF16 元素。在每一对中，第一个源向量中的元素乘以第二个源向量中的相应元素。然后将得到的单精度乘积相加并破坏性地添加到目标向量的单精度元素中，该元素与第一个源向量中的 BF16 值对对齐。该指令忽略FPCR并且不更新FPSR异常状态。 "
    },
    {
      "name": "vbfdot_laneq_f32",
      "full name": "float32x2_t vbfdot_laneq_f32(float32x2_t r,bfloat16x4_t a,bfloat16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of 16-bit BF16 elements. Within each pair, the elements in the first source vector are multiplied by the corresponding elements in the second source vector. The resulting single-precision products are then summed and added destructively to the single-precision element of the destination vector that aligns with the pair of BF16 values in the first source vector. The instruction ignores the FPCR and does not update the FPSR exception status.",
      "function_cn": "BFloat16 浮点点积（向量）。此指令将源向量分隔为成对的 16 位 BF16 元素。在每一对中，第一个源向量中的元素乘以第二个源向量中的相应元素。然后将得到的单精度乘积相加并破坏性地添加到目标向量的单精度元素中，该元素与第一个源向量中的 BF16 值对对齐。该指令忽略FPCR并且不更新FPSR异常状态。 "
    },
    {
      "name": "vbfdotq_lane_f32",
      "full name": "float32x4_t vbfdotq_lane_f32(float32x4_t r,bfloat16x8_t a,bfloat16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of 16-bit BF16 elements. Within each pair, the elements in the first source vector are multiplied by the corresponding elements in the second source vector. The resulting single-precision products are then summed and added destructively to the single-precision element of the destination vector that aligns with the pair of BF16 values in the first source vector. The instruction ignores the FPCR and does not update the FPSR exception status.",
      "function_cn": "BFloat16 浮点点积（向量）。此指令将源向量分隔为成对的 16 位 BF16 元素。在每一对中，第一个源向量中的元素乘以第二个源向量中的相应元素。然后将得到的单精度乘积相加并破坏性地添加到目标向量的单精度元素中，该元素与第一个源向量中的 BF16 值对对齐。该指令忽略FPCR并且不更新FPSR异常状态。 "
    },
    {
      "name": "vbfmmlaq_f32",
      "full name": "float32x4_t vbfmmlaq_f32(float32x4_t r,bfloat16x8_t a,bfloat16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point matrix multiply-accumulate into 2x2 matrix. This instruction multiplies the 2x4 matrix of BF16 values held in the first 128-bit source vector by the 4x2 BF16 matrix in the second 128-bit source vector. The resulting 2x2 single-precision matrix product is then added destructively to the 2x2 single-precision matrix in the 128-bit destination vector. This is equivalent to performing a 4-way dot product per destination element. The instruction ignores the FPCR and does not update the FPSR exception status.",
      "function_cn": "BFloat16 浮点矩阵乘法累加成 2x2 矩阵。此指令将第一个 128 位源向量中保存的 BF16 值的 2x4 矩阵乘以第二个 128 位源向量中的 4x2 BF16 矩阵。然后将生成的 2x2 单精度矩阵乘积破坏性地添加到 128 位目标向量中的 2x2 单精度矩阵。这相当于对每个目标元素执行 4 路点积。该指令忽略FPCR并且不更新FPSR异常状态。 "
    },
    {
      "name": "vbfmlalbq_f32",
      "full name": "float32x4_t vbfmlalbq_f32(float32x4_t r,bfloat16x8_t a,bfloat16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values to the overlapping single-precision elements of the destination vector.",
      "function_cn": "BFloat16 浮点扩展乘加长整型（向量）将第一个和第二个源向量中的偶数（底部）或奇数（顶部）16 位元素从 Bfloat16 扩展为单精度格式。然后该指令将这些值相乘并添加到目标向量的重叠单精度元素。"
    },
    {
      "name": "vbfmlaltq_f32",
      "full name": "float32x4_t vbfmlaltq_f32(float32x4_t r,bfloat16x8_t a,bfloat16x8_t b)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values to the overlapping single-precision elements of the destination vector.",
      "function_cn": "BFloat16 浮点扩展乘加长整型（向量）将第一个和第二个源向量中的偶数（底部）或奇数（顶部）16 位元素从 Bfloat16 扩展为单精度格式。然后该指令将这些值相乘并添加到目标向量的重叠单精度元素。"
    },
    {
      "name": "vbfmlalbq_lane_f32",
      "full name": "float32x4_t vbfmlalbq_lane_f32(float32x4_t r,bfloat16x8_t a,bfloat16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values to the overlapping single-precision elements of the destination vector.",
      "function_cn": "BFloat16 浮点扩展乘加长整型（向量）将第一个和第二个源向量中的偶数（底部）或奇数（顶部）16 位元素从 Bfloat16 扩展为单精度格式。然后该指令将这些值相乘并添加到目标向量的重叠单精度元素。"
    },
    {
      "name": "vbfmlalbq_laneq_f32",
      "full name": "float32x4_t vbfmlalbq_laneq_f32(float32x4_t r,bfloat16x8_t a,bfloat16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values to the overlapping single-precision elements of the destination vector.",
      "function_cn": "BFloat16 浮点扩展乘加长整型（向量）将第一个和第二个源向量中的偶数（底部）或奇数（顶部）16 位元素从 Bfloat16 扩展为单精度格式。然后该指令将这些值相乘并添加到目标向量的重叠单精度元素。"
    },
    {
      "name": "vbfmlaltq_lane_f32",
      "full name": "float32x4_t vbfmlaltq_lane_f32(float32x4_t r,bfloat16x8_t a,bfloat16x4_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values to the overlapping single-precision elements of the destination vector.",
      "function_cn": "BFloat16 浮点扩展乘加长整型（向量）将第一个和第二个源向量中的偶数（底部）或奇数（顶部）16 位元素从 Bfloat16 扩展为单精度格式。然后该指令将这些值相乘并添加到目标向量的重叠单精度元素。"
    },
    {
      "name": "vbfmlaltq_laneq_f32",
      "full name": "float32x4_t vbfmlaltq_laneq_f32(float32x4_t r,bfloat16x8_t a,bfloat16x8_t b,const int lane)",
      "Intel name": "",
      "Intel Asm": "",
      "Arm Asm": "",
      "function_en": "BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values to the overlapping single-precision elements of the destination vector.",
      "function_cn": "BFloat16 浮点扩展乘加长整型（向量）将第一个和第二个源向量中的偶数（底部）或奇数（顶部）16 位元素从 Bfloat16 扩展为单精度格式。然后该指令将这些值相乘并添加到目标向量的重叠单精度元素。"
    }
  ],
  "BuiltIn": [
    {
      "name": "__builtin_aarch64_get_fpcr",
      "full name": "unsigned int __builtin_aarch64_get_fpcr ()",
      "function_en": "Obtains the contents of the FPCR register and views related parameter settings.",
      "function_cn": "获取FPCR寄存器内容，查看相关参数设置",
      "support_version": "5.5 ~ 9.3"
    },
    {
      "name": "__builtin_aarch64_set_fpcr",
      "full name": "void __builtin_aarch64_set_fpcr (unsigned int)",
      "function_en": "Sets the contents of the FPCR register.",
      "function_cn": "设置FPCR寄存器内容",
      "support_version": "5.5 ~ 9.3"
    },
    {
      "name": "__builtin_aarch64_get_fpsr",
      "full name": "unsigned int __builtin_aarch64_get_fpsr ()",
      "function_en": "Obtains the contents of the FPSR register and views related parameter settings.",
      "function_cn": "获取FPSR寄存器内容，查看相关参数设置",
      "support_version": "5.5 ~ 9.3"
    },
    {
      "name": "__builtin_aarch64_set_fpsr",
      "full name": "void __builtin_aarch64_set_fpsr (unsigned int)",
      "function_en": "Sets the contents of the FPSR register.",
      "function_cn": "设置FPSR寄存器内容",
      "support_version": "5.5 ~ 9.3"
    }
  ],
  "gcc": [
    {
      "name": "gcc4.8.5",
      "version": "4.8.5",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc4.8.5"
    },
    {
      "name": "gcc4.9.3",
      "version": "4.9.3",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-4.9.3/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc4.9.3"
    },
    {
      "name": "gcc5.1.0",
      "version": "5.1.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-5.1.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc5.1.0"
    },
    {
      "name": "gcc5.2.0",
      "version": "5.2.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-5.2.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc5.2.0"
    },
    {
      "name": "gcc5.3.0",
      "version": "5.3.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-5.3.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc5.3.0"
    },
    {
      "name": "gcc5.4.0",
      "version": "5.4.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-5.4.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc5.4.0"
    },
    {
      "name": "gcc5.5.0",
      "version": "5.5.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc5.5.0"
    },
    {
      "name": "gcc6.1.0",
      "version": "6.1.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc6.1.0"
    },
    {
      "name": "gcc6.2.0",
      "version": "6.2.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-6.2.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc6.2.0"
    },
    {
      "name": "gcc6.3.0",
      "version": "6.3.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-6.3.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc6.3.0"
    },
    {
      "name": "gcc6.4.0",
      "version": "6.4.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-6.4.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc6.4.0"
    },
    {
      "name": "gcc6.5.0",
      "version": "6.5.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-6.5.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc6.5.0"
    },
    {
      "name": "gcc7.1.0",
      "version": "7.1.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-7.1.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc7.1.0"
    },
    {
      "name": "gcc7.2.0",
      "version": "7.2.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-7.2.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc7.2.0"
    },
    {
      "name": "gcc7.3.0",
      "version": "7.3.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-7.3.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc7.3.0"
    },
    {
      "name": "gcc7.4.0",
      "version": "7.4.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-7.4.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc7.4.0"
    },
    {
      "name": "gcc8.1.0",
      "version": "8.1.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-8.1.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc8.1.0"
    },
    {
      "name": "gcc8.2.0",
      "version": "8.2.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-8.2.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc8.2.0"
    },
    {
      "name": "gcc8.3.0",
      "version": "8.3.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc8.3.0"
    },
    {
      "name": "gcc9.1.0",
      "version": "9.1.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc9.1.0"
    },
    {
      "name": "gcc9.2.0",
      "version": "9.2.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-9.2.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc9.2.0"
    },
    {
      "name": "gcc9.3.0",
      "version": "9.3.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-9.3.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc9.3.0"
    },
    {
      "name": "gcc10.1.0",
      "version": "10.1.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-10.1.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc10.1.0"
    },
    {
      "name": "gcc10.2.0",
      "version": "10.2.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-10.2.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc10.2.0"
    },
    {
      "name": "gcc10.3.0",
      "version": "10.3.0",
      "help_url": "http://gcc.gnu.org/onlinedocs/gcc-10.3.0/gcc/AArch64-Options.html#AArch64-Options",
      "description": "AArch64 compilation option information of Gcc10.3.0"
    },
    {
      "name": "bisheng compiler2.1.0",
      "version": "2.1.0",
      "help_url": "https://www.hikunpeng.com/document/detail/zh/kunpengdevkithistory/bisheng/hist-bisheng/kunpengbisheng_06_0009_3.html",
      "description": "AArch64 compilation option information of BiSheng Compiler2.1.0"
    },
    {
      "name": "bisheng compiler2.3.0",
      "version": "2.3.0",
      "help_url": "https://www.hikunpeng.com/document/detail/zh/kunpengdevkithistory/bisheng/hist-bisheng/kunpengbisheng_06_0009_0.html",
      "description": "AArch64 compilation option information of BiSheng Compiler2.3.0"
    },
    {
      "name": "bisheng compiler2.4.0",
      "version": "2.4.0",
      "help_url": "https://www.hikunpeng.com/document/detail/zh/kunpengdevkithistory/bisheng/hist-bisheng/kunpengbisheng_06_0009.html",
      "description": "AArch64 compilation option information of BiSheng Compiler2.4.0"
    },
    {
      "name": "bisheng compiler2.5.0",
      "version": "2.5.0",
      "help_url": "https://www.hikunpeng.com/document/detail/zh/kunpengdevps/compiler/ug-bisheng/kunpengbisheng_06_0009.html",
      "description": "AArch64 compilation option information of BiSheng Compiler2.5.0"
    }
  ],
  "compileOption": [
    {
      "gcc_version": "4.8.5",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references will be handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit or keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Omit or keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-march",
      "range": [
        "armv8-a",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target architecture, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mcpu",
      "range": [
        "generic",
        "large",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.8.5",
      "name": "-mtune",
      "range": [
        "generic",
        "large"
      ],
      "description": "Specify the name of the processor to tune the performance for.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mabi",
      "range": [
        "lp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references will be handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-march",
      "range": [
        "armv8-a",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target architecture, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a57.cortex-a53"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "4.9.3",
      "name": "-mcpu",
      "range": [
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target architecture, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.1.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target architecture, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.2.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target architecture, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.3.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, 'small', 'large' code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target architecture, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.4.0",
      "name": "-mcpu",
      "range": [
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target architecture, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "5.5.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "crc",
        "crypto",
        "fp",
        "simd"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.1.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.2.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the 'tiny', 'small' or 'large' code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Omit or keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mtls-dialect",
      "range": [],
      "description": "Use TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mtls-dialect",
      "range": [],
      "description": "Use traditional TLS as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Enable or disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Enable or disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-1",
        "native"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.3.0",
      "name": "-mpc-relative-literal-loads",
      "range": [
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Enable PC relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.4.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Do not assume that unaligned memory references are handled by the system.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "When calculating the reciprocal square root approximation, uses one less step than otherwise, thus reducing latency and precision.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "exynos-m1",
        "qdf24xx",
        "thunderx",
        "xgene1",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "6.5.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse",
        "fp16"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse",
        "fp16"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.1.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse",
        "fp16"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse",
        "fp16"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.2.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp32"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the 'tiny', 'small' or 'large' code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit or keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Omit or keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS or TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mtls-size",
      "range": [],
      "description": "Specify bit size of immediate TLS offsets. Valid values are 12, 24, 32, 48. This option requires binutils 2.26 or newer.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "native"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.3.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "none-leaf",
        "all",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse",
        "fp16"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse",
        "fp16"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "lse",
        "fp16"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads. With this option literal pools are accessed using a single instruction and emitted after each function.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "7.4.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "saphira",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "saphira",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.1.0",
      "name": "-msve-vector-bits",
      "range": [],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "saphira",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "saphira",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.2.0",
      "name": "-msve-vector-bits",
      "range": [],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-march",
      "range": [
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "native"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "saphira",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mcpu",
      "range": [
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "exynos-m1",
        "falkor",
        "qdf24xx",
        "saphira",
        "xgene1",
        "vulcan",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "8.3.0",
      "name": "-msve-vector-bits",
      "range": [
        "scalable",
        "128",
        "256",
        "512",
        "1024",
        "2048"
      ],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-strict-align",
      "range": [],
      "description": "Allow generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mstack-protector-guard",
      "range": [],
      "description": "Generate stack protection code using canary at guard.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mstack-protector-guard-reg",
      "range": [],
      "description": "Generate stack protection code using canary with base register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mstack-protector-guard-offset",
      "range": [],
      "description": "Generate stack protection code using canary with base register which offset from.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mtrack-speculation",
      "range": [],
      "description": "Enable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-track-speculation",
      "range": [],
      "description": "Disable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "armv8.5-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-mbranch-protection",
      "range": [
        "none",
        "standard",
        "pac-ret[+leaf]",
        "bti"
      ],
      "description": "Select the branch protection features to use.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.1.0",
      "name": "-msve-vector-bits",
      "range": [
        "scalable",
        "128",
        "256",
        "512",
        "1024",
        "2048"
      ],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-strict-align",
      "range": [],
      "description": "Allow generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mstack-protector-guard",
      "range": [],
      "description": "Generate stack protection code using canary at guard.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mstack-protector-guard-reg",
      "range": [],
      "description": "Generate stack protection code using canary with base register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mstack-protector-guard-offset",
      "range": [],
      "description": "Generate stack protection code using canary with base register which offset from.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mtrack-speculation",
      "range": [],
      "description": "Enable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-track-speculation",
      "range": [],
      "description": "Disable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "armv8.5-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-mbranch-protection",
      "range": [
        "none",
        "standard",
        "pac-ret[+leaf]",
        "bti"
      ],
      "description": "Select the branch protection features to use.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.2.0",
      "name": "-msve-vector-bits",
      "range": [
        "scalable",
        "128",
        "256",
        "512",
        "1024",
        "2048"
      ],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-strict-align",
      "range": [],
      "description": "Allow generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mstack-protector-guard",
      "range": [],
      "description": "Generate stack protection code using canary at guard.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mstack-protector-guard-reg",
      "range": [],
      "description": "Generate stack protection code using canary with base register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mstack-protector-guard-offset",
      "range": [],
      "description": "Generate stack protection code using canary with base register which offset from.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mtrack-speculation",
      "range": [],
      "description": "Enable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-track-speculation",
      "range": [],
      "description": "Disable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "armv8.5-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-mbranch-protection",
      "range": [
        "none",
        "standard",
        "pac-ret[+leaf]",
        "bti"
      ],
      "description": "Select the branch protection features to use.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "9.3.0",
      "name": "-msve-vector-bits",
      "range": [
        "scalable",
        "128",
        "256",
        "512",
        "1024",
        "2048"
      ],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model. The -mcmodel=large option is incompatible with -mabi=ilp32, -fpic and -fPIC.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-strict-align",
      "range": [],
      "description": "Allow generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mstack-protector-guard",
      "range": [],
      "description": "Generate stack protection code using canary at guard.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mstack-protector-guard-reg",
      "range": [],
      "description": "Generate stack protection code using canary with base register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mstack-protector-guard-offset",
      "range": [],
      "description": "Generate stack protection code using canary with base register which offset from.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mtrack-speculation",
      "range": [],
      "description": "Enable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-track-speculation",
      "range": [],
      "description": "Disable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "armv8.5-a",
        "armv8.6-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres",
        "sve2",
        "sve2-bitperm",
        "sve2-sm4",
        "sve2-aes",
        "sve2-sha3",
        "tme",
        "i8mm",
        "f32mm",
        "f64mm",
        "bf16"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "cortex-a76ae",
        "cortex-a77",
        "cortex-a65",
        "cortex-a65ae",
        "cortex-a34",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "octeontx2",
        "octeontx2t98",
        "octeontx2t96",
        "octeontx2t93",
        "octeontx2f95",
        "octeontx2f95n",
        "octeontx2f95mm",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "thunderx3t110",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mbranch-protection",
      "range": [
        "none",
        "standard",
        "pac-ret[+leaf+b-key]",
        "bti"
      ],
      "description": "Select the branch protection features to use.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-msve-vector-bits",
      "range": [
        "scalable",
        "128",
        "256",
        "512",
        "1024",
        "2048"
      ],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-moutline-atomics",
      "range": [],
      "description": "Enable or disable calls to out-of-line helpers to implement atomic operations.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.1.0",
      "name": "-mno-outline-atomics",
      "range": [],
      "description": "Enable or disable calls to out-of-line helpers to implement atomic operations.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model. The -mcmodel=large option is incompatible with -mabi=ilp32, -fpic and -fPIC.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-strict-align",
      "range": [],
      "description": "Allow generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mstack-protector-guard",
      "range": [],
      "description": "Generate stack protection code using canary at guard.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mstack-protector-guard-reg",
      "range": [],
      "description": "Generate stack protection code using canary with base register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mstack-protector-guard-offset",
      "range": [],
      "description": "Generate stack protection code using canary with base register which offset from.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mtrack-speculation",
      "range": [],
      "description": "Enable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-track-speculation",
      "range": [],
      "description": "Disable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "armv8.5-a",
        "armv8.6-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres",
        "sve2",
        "sve2-bitperm",
        "sve2-sm4",
        "sve2-aes",
        "sve2-sha3",
        "tme",
        "i8mm",
        "f32mm",
        "f64mm",
        "bf16"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "cortex-a76ae",
        "cortex-a77",
        "cortex-a65",
        "cortex-a65ae",
        "cortex-a34",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "octeontx2",
        "octeontx2t98",
        "octeontx2t96",
        "octeontx2t93",
        "octeontx2f95",
        "octeontx2f95n",
        "octeontx2f95mm",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "thunderx3t110",
        "zeus",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mbranch-protection",
      "range": [
        "none",
        "standard",
        "pac-ret[+leaf+b-key]",
        "bti"
      ],
      "description": "Select the branch protection features to use.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-msve-vector-bits",
      "range": [
        "scalable",
        "128",
        "256",
        "512",
        "1024",
        "2048"
      ],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-moutline-atomics",
      "range": [],
      "description": "Enable or disable calls to out-of-line helpers to implement atomic operations.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.2.0",
      "name": "-mno-outline-atomics",
      "range": [],
      "description": "Enable or disable calls to out-of-line helpers to implement atomic operations.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mabi",
      "range": [
        "ilp32",
        "lp64"
      ],
      "description": "Generate code for the specified data model.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mbig-endian",
      "range": [],
      "description": "Generate big-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mgeneral-regs-only",
      "range": [],
      "description": "Generate code which uses only the general-purpose registers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mlittle-endian",
      "range": [],
      "description": "Generate little-endian code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mcmodel",
      "range": [
        "tiny",
        "small",
        "large"
      ],
      "description": "Generate code for the tiny, small, large code model. The -mcmodel=large option is incompatible with -mabi=ilp32, -fpic and -fPIC.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mstrict-align",
      "range": [],
      "description": "Avoid generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-strict-align",
      "range": [],
      "description": "Allow generating memory accesses that may not be aligned on a natural object boundary as described in the architecture specification.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-momit-leaf-frame-pointer",
      "range": [],
      "description": "Omit the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-omit-leaf-frame-pointer",
      "range": [],
      "description": "Keep the frame pointer in leaf functions.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mstack-protector-guard",
      "range": [],
      "description": "Generate stack protection code using canary at guard.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mstack-protector-guard-reg",
      "range": [],
      "description": "Generate stack protection code using canary with base register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mstack-protector-guard-offset",
      "range": [],
      "description": "Generate stack protection code using canary with base register which offset from.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mtls-dialect",
      "range": [
        "desc",
        "traditional"
      ],
      "description": "Use traditional TLS / TLS descriptors as the thread-local storage mechanism for dynamic accesses of TLS variables.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mtls-size",
      "range": [
        "12",
        "24",
        "32",
        "48"
      ],
      "description": "Specify bit size of immediate TLS offsets.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mfix-cortex-a53-835769",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-fix-cortex-a53-835769",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 835769.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mfix-cortex-a53-843419",
      "range": [],
      "description": "Enable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-fix-cortex-a53-843419",
      "range": [],
      "description": "Disable the workaround for the ARM Cortex-A53 erratum number 843419.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mlow-precision-recip-sqrt",
      "range": [],
      "description": "Enable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-low-precision-recip-sqrt",
      "range": [],
      "description": "Disable the reciprocal square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mlow-precision-sqrt",
      "range": [],
      "description": "Enable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-low-precision-sqrt",
      "range": [],
      "description": "Disable the square root approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mlow-precision-div",
      "range": [],
      "description": "Enable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-low-precision-div",
      "range": [],
      "description": "Disable the division approximation.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mtrack-speculation",
      "range": [],
      "description": "Enable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-track-speculation",
      "range": [],
      "description": "Disable generation of additional code to track speculative execution through conditional branches.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-march",
      "range": [
        "armv8-a",
        "armv8.1-a",
        "armv8.2-a",
        "armv8.3-a",
        "armv8.4-a",
        "armv8.5-a",
        "armv8.6-a",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres",
        "sve2",
        "sve2-bitperm",
        "sve2-sm4",
        "sve2-aes",
        "sve2-sha3",
        "tme",
        "i8mm",
        "f32mm",
        "f64mm",
        "bf16"
      ],
      "description": "Specify the name of the target architecture and, optionally, one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mtune",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "cortex-a76ae",
        "cortex-a77",
        "cortex-a65",
        "cortex-a65ae",
        "cortex-a34",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "neoverse-n2",
        "neoverse-v1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "octeontx2",
        "octeontx2t98",
        "octeontx2t96",
        "octeontx2t93",
        "octeontx2f95",
        "octeontx2f95n",
        "octeontx2f95mm",
        "a64fx",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "thunderx3t110",
        "zeus",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native"
      ],
      "description": "Specify the name of the target processor for which GCC should tune the performance of the code.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mcpu",
      "range": [
        "generic",
        "cortex-a35",
        "cortex-a53",
        "cortex-a55",
        "cortex-a57",
        "cortex-a72",
        "cortex-a73",
        "cortex-a75",
        "cortex-a76",
        "ares",
        "exynos-m1",
        "emag",
        "falkor",
        "neoverse-e1",
        "neoverse-n1",
        "qdf24xx",
        "saphira",
        "phecda",
        "xgene1",
        "vulcan",
        "octeontx",
        "octeontx81",
        "octeontx83",
        "thunderx",
        "thunderxt88",
        "thunderxt88p1",
        "thunderxt81",
        "tsv110",
        "thunderxt83",
        "thunderx2t99",
        "cortex-a57.cortex-a53",
        "cortex-a72.cortex-a53",
        "cortex-a73.cortex-a35",
        "cortex-a73.cortex-a53",
        "cortex-a75.cortex-a55",
        "cortex-a76.cortex-a55",
        "native",
        "crc",
        "crypto",
        "fp",
        "simd",
        "sve",
        "lse",
        "rdma",
        "fp16",
        "fp16fml",
        "rcpc",
        "dotprod",
        "aes",
        "sha2",
        "sha3",
        "sm4",
        "profile",
        "rng",
        "memtag",
        "sb",
        "ssbs",
        "predres"
      ],
      "description": "Specify the name of the target processor, optionally suffixed by one or more feature modifiers.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-moverride",
      "range": [],
      "description": "Override tuning decisions made by the back-end in response to a -mtune= switch.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mverbose-cost-dump",
      "range": [],
      "description": "Enable verbose cost model dumping in the debug dump files.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mpc-relative-literal-loads",
      "range": [],
      "description": "Enable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-pc-relative-literal-loads",
      "range": [],
      "description": "Disable PC-relative literal loads.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-msign-return-address",
      "range": [
        "none",
        "non-leaf",
        "all"
      ],
      "description": "Select the function scope on which return address signing will be applied.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mbranch-protection",
      "range": [
        "none",
        "standard",
        "pac-ret[+leaf+b-key]",
        "bti"
      ],
      "description": "Select the branch protection features to use.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mharden-sls",
      "range": [
        "retbr",
        "blr",
        "all",
        "none"
      ],
      "description": "Enable compiler hardening against straight line speculation (SLS).",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-msve-vector-bits",
      "range": [
        "scalable",
        "128",
        "256",
        "512",
        "1024",
        "2048"
      ],
      "description": "Specify the number of bits in an SVE vector register.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-moutline-atomics",
      "range": [],
      "description": "Enable or disable calls to out-of-line helpers to implement atomic operations.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "10.3.0",
      "name": "-mno-outline-atomics",
      "range": [],
      "description": "Enable or disable calls to out-of-line helpers to implement atomic operations.",
      "compileName": "GCC"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mflushz",
      "range": [],
      "description": "This option indicates whether to flush denormalized floating-point values to zero and is different from other unsecure floating-point optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ffp-contract",
      "range": [],
      "description": "The value of this option can be off, on, or fast. BiSheng compiler sets the value to fast by default to enable floating-point multiply-add operations and combine multiplication and addition into one operation, improving operation performance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-faarch64-pow-alt-precision",
      "range": [
        "18",
        "21"
      ],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the pow function so that the computing result of the pow function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-faarch64-minmax-alt-precision",
      "range": [],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the min or max function so that the computing result of the min or max function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-disable-sincos-opt",
      "range": [],
      "description": "This llvm option is used to change the optimization policies of the sin or cos function so that the computing result of the sin or cos function is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-aarch64-recip-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the recip reciprocal instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-aarch64-rsqrt-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the rsqrt reciprocal square root extraction instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-alt-precision-math-functions",
      "range": [],
      "description": "This llvm option is used to replace names of the math functions __mth_i_cosd, __mth_i_asind, and __pd_powi_1 with cosdf, asindf, and powr8i4 to control their precision. (This option must be used together with the KML math library.) This option takes effect only for O1 or higher optimization levels. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-18-math-compatibility",
      "range": [],
      "description": "This llvm option is used to convert math functions such as tgammaf, cbrt, log, and log10 to functions suffixed with _18 to control their precision. (This option must be used together with the KML math library.) This option takes effect only when the optimization level is higher than or equal to O1 and -mllvm -enable-alt-precision-math-functions is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ffp-compatibility",
      "range": [
        "17",
        "18",
        "21"
      ],
      "description": "This general option is used to control all options that need to be enabled to ensure that the calculation result is consistent with that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ffma-combine-fdiv",
      "range": [],
      "description": "This general option is used to optimize the expression a/b+c to fma(a, 1/b, c), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ffma-reverse-associative",
      "range": [],
      "description": "This general option is used to optimize the expression ab+cd to fma(a, b, c*d), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Hx,124,0xc00000",
      "range": [],
      "description": "This Flang option is used to keep the rounding mode of constant initialization consistent with that on the non-ARM computing platform. This option is valid only for Fortran.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-force-customized-pipeline",
      "range": [
        "true",
        "false"
      ],
      "description": "This option forcibly uses the customized pass pipeline. The value true indicates that the optimization is enabled. By default, the optimization is disabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-sad-pattern-recognition",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the absolute value summation operation for differences (sum += abs(a[i] \u2013 b[i])) to generate a more simplified and efficient operation sequence. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-instcombine-ctz-array",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the calculation for De Bruijn sequence table lookup. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-aarch64-loopcond-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option reduces unnecessary instructions for loop condition judgment under some conditions to optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-aarch64-hadd-generation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option uses only one ARM NEON instruction URHADD to complete the vectorized operation (x[i] + y[i] + 1) >> 1 and optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-loop-split",
      "range": [
        "true",
        "false"
      ],
      "description": "This option splits a loop meeting specific conditions into multiple loops to facilitate the reduction of unnecessary loops. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-mem-chk-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the logic of runtime checks generated for LLVM loop vectorization and improves loop vectorization code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-aarch64-ldp-stp-noq",
      "range": [
        "true",
        "false"
      ],
      "description": "This option prohibits the generation of stp/ldp q1, q2, or addr instructions. The performance of these instructions is not ideal. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-func-arg-analysis",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances LLVM range analysis to adapt LLVM function specialization optimization to more functions. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ipsccp-enable-function-specialization",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances the function specialization optimization to adapt the function specialization optimization to functions with function pointers. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-modest-vectorization-unrolling-factors",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies vectorization for loops with a smaller step. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-instcombine-shrink-vector-element",
      "range": [
        "true",
        "false"
      ],
      "description": "This option improves the degree of parallelism (DOP) of vectorized instructions and eliminates the scalar median value generated during vectorization, improving the effect of loop vectorization. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-instcombine-reorder-sum-of-reduce-add",
      "range": [
        "true",
        "false"
      ],
      "description": "This option changes the sequence of reduction operations to improve the reduction code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-replace-fortran-mem-alloc",
      "range": [
        "true",
        "false"
      ],
      "description": "This option allocates stack memory, instead of heap memory, to improve performance when a memory allocation operation of known size (such as arrays) is required in Fortran code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-pg-math-call-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the calling of multiple Fortran math library functions to advance the calling performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-instcombine-gep-common",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the element address calculation for multi-dimensional arrays in complex scenarios (such as nested loops) to reduce the register pressure and improve program performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-sroa-after-unroll",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enables the function of adding SROA after loop unrolling to reduce memory access operations and store variables in the register. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-disable-recursive-bonus",
      "range": [
        "true",
        "false"
      ],
      "description": "This option makes function calling in a recursive function easier to be inlined, improving the performance of frequently called recursive functions. The value true indicates that the inline operation is disabled. The default value is false, indicating that the inline operation is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-disable-recip-sqrt-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the formats of A = (C / sqrt(Y)) and B = A * A in FastMath scenarios to reduce the number of instructions. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-disable-loop-aware-reassociation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option adds loop awareness to Reassociate Pass to limit some operations within the loop, preventing performance deterioration caused by the increase of instructions in the loop. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Hx,70,0x20000000",
      "range": [],
      "description": "In O1, O2, and O3, BiSheng compiler enables minloc and maxloc inlining in the flang1 phase. After inlining, the functions can be called simply using for loops, which facilitates further optimization in LLVM. This option can disable inlining, which is the same as O0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-gep-common",
      "range": [],
      "description": "This option generates a common parent for GEP clusters that originate from the same instruction by removing add instructions (that are used as indexes). \\r\\n-mllvm -gep-common=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -gep-cluster-min=<Int number> indicates the GEP cluster threshold. The default value is 3. \\r\\n-mllvm -gep-loop-mindepth=<Int number> indicates the loop threshold. The default value is 3.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-array-restructuring",
      "range": [],
      "description": "This option optimizes the memory access mode of one or more arrays in a program and rearranges arrays to reduce the running time. \\r\\n-mllvm -enable-array-restructuring=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -skip-array-restructuring-codegen=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-struct-peel",
      "range": [],
      "description": "This option optimizes structure peeling and increases the local cache when the structure fields in a structure array are accessed, reducing the running time. \\r\\n-mllvm -enable-struct-peel=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -struct-peel-skip-transform=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false. \\r\\n-mllvm -struct-peel-this=... indicates forcibly peeling a structure defined by the user (subject to legality).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-sort-ivusers-before-lsr",
      "range": [],
      "description": "Loop strength reduction (LSR) optimization is performed only after induction variable users are sorted. This prevents binary assembly inconsistency during multiple compilations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-disable-extra-gate-for-loop-heuristic",
      "range": [
        "true",
        "false"
      ],
      "description": "This option adds conditions to decide whether to enable the branch prediction optimization for a loop. The value true indicates that the optimization is enabled. By default, the optimization is enabled To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-enable-fp-aggressive-interleave",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the A = A + B accumulation operation in a loop and selects the interleave value based on the register pressure to perform loop unrolling for accumulation expressions. Enabling this option causes floating-point precision loss. The value true indicates that the optimization is enabled. By default, the optimization is disabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-E",
      "range": [],
      "description": "Run the preprocessor stage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fsyntax-only",
      "range": [],
      "description": "Run the preprocessor, parser and type checking stages.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-S",
      "range": [],
      "description": "Run the previous stages as well as LLVM generation and optimization stages and target-specific code generation, producing an assembly file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-c",
      "range": [],
      "description": "Run all of the above, plus the assembler, generating a target '.o' object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-x",
      "range": [],
      "description": "Treat subsequent input files as having type language.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-std",
      "range": [
        "c89",
        "c90",
        "iso9899:1990",
        "iso9899:199409",
        "gnu89",
        "gnu90",
        "iso9899:1999",
        "gnu99",
        "c11",
        "iso9899:2011",
        "gnu11",
        "c17",
        "iso9899:2017",
        "gnu17c++98",
        "c++03",
        "gnu++98",
        "gnu++03",
        "c++11",
        "gnu++11",
        "c++14",
        "gnu++14",
        "c++17",
        "gnu++17",
        "c++20",
        "gnu++20",
        "c++2b",
        "gnu++2b",
        "cl1.0",
        "cl1.1",
        "cl1.2",
        "cl2.0",
        "cuda"
      ],
      "description": "Specify the language standard to compile for. The default C language standard is gnu17, except on PS4, where it is gnu99. The default C++ language standard is gnu++17. The default OpenCL language standard is cl1.0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-stdlib",
      "range": [
        "libstdc++",
        "libc++"
      ],
      "description": "Specify the C++ standard library to use; supported options are libstdc++ and libc++. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-rtlib",
      "range": [
        "libgcc",
        "compiler-rt"
      ],
      "description": "Specify the compiler runtime library to use; supported options are libgcc and compiler-rt. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ansi",
      "range": [],
      "description": "Same as -std=c89.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ObjC",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ObjC++",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-trigraphs",
      "range": [],
      "description": "Enable trigraphs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ffreestanding",
      "range": [],
      "description": "Indicate that the file should be compiled for a freestanding, not a hosted, environment. Note that it is assumed that a freestanding environment will additionally provide memcpy, memmove, memset and memcmp implementations, as these are needed for efficient codegen for many programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fno-builtin",
      "range": [],
      "description": "Disable special handling and optimizations of builtin functions like strlen() and malloc().",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fmath-errno",
      "range": [],
      "description": "Indicate that math functions should be treated as updating errno.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fpascal-strings",
      "range": [],
      "description": "Enable support for Pascal-style strings with '\\pfoo'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fms-extensions",
      "range": [],
      "description": "Enable support for Microsoft extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fmsc-version",
      "range": [],
      "description": "Set _MSC_VER. Defaults to 1300 on Windows. Not set otherwise.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fborland-extensions",
      "range": [],
      "description": "Enable support for Borland extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fwritable-strings",
      "range": [],
      "description": "Make all string literals default to writable. This disables uniquing of strings and other optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-flax-vector-conversions",
      "range": [
        "none",
        "integer",
        "all"
      ],
      "description": "Allow loose type checking rules for implicit vector conversions. Defaults to integer if unspecified.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fno-lax-vector-conversions",
      "range": [],
      "description": "Allow loose type checking rules for implicit vector conversions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fblocks",
      "range": [],
      "description": "Enable the 'Blocks' language feature.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fobjc-abi-version",
      "range": [
        "1",
        "2",
        "3"
      ],
      "description": "Select the Objective-C ABI version to use. Available versions are 1 (legacy 'fragile' ABI), 2 (non-fragile ABI 1), and 3 (non-fragile ABI 2).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fobjc-nonfragile-abi-version",
      "range": [],
      "description": "Select the Objective-C non-fragile ABI version to use by default. This will only be used as the Objective-C ABI when the non-fragile ABI is enabled (either via -fobjc-nonfragile-abi, or because it is the platform default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fobjc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fno-objc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI. On platforms for which this is the default ABI, it can be disabled with -fno-objc-nonfragile-abi.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-arch",
      "range": [],
      "description": "Specify the architecture to build for.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-mmacosx-version-min",
      "range": [],
      "description": "When building for macOS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-miphoneos-version-min",
      "range": [],
      "description": "When building for iPhone OS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-mcpu",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-mtune",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-march",
      "range": [],
      "description": "Specify that Clang should generate code for a specific processor family member and later. For example, if you specify -march=i486, the compiler is allowed to generate instructions that are valid on i486 and later processors, but which may not exist on earlier ones.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-g",
      "range": [],
      "description": "Generate debug information.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-gline-tables-only",
      "range": [],
      "description": "Generate only line table debug information. This allows for symbolicated backtraces with inlining information, but does not include any information about variables, their locations or types.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-gmodules",
      "range": [],
      "description": "Generate debug information that contains external references to types defined in Clang modules or precompiled headers instead of emitting redundant debug type information into every object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fstandalone-debug",
      "range": [],
      "description": "Clang supports a number of optimizations to reduce the size of debug information in the binary.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fno-standalone-debug",
      "range": [],
      "description": "On Darwin -fstandalone-debug is enabled by default.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-feliminate-unused-debug-types",
      "range": [],
      "description": "By default, Clang does not emit type information for types that are defined but not used in a program. To retain the debug info for these unused types, the negation -fno-eliminate-unused-debug-types can be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fexceptions",
      "range": [],
      "description": "Enable generation of unwind information. This allows exceptions to be thrown through Clang compiled stack frames. This is on by default in x86-64.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ftrapv",
      "range": [],
      "description": "Generate code to catch integer overflow errors. Signed integer overflow is undefined in C. With this flag, extra code is generated to detect this and abort when it happens.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fvisibility",
      "range": [],
      "description": "This flag sets the default visibility level.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fcommon",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fno-common",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage. It can be disabled with -fno-common.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ftls-model",
      "range": [
        "global-dynamic",
        "local-dynamic",
        "initial-exec",
        "local-exec"
      ],
      "description": "Set the default thread-local storage (TLS) model to use for thread-local variables. Valid values are: 'global-dynamic', 'local-dynamic', 'initial-exec' and 'local-exec'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-flto",
      "range": [
        "full",
        "thin"
      ],
      "description": "Generate output files in LLVM formats, suitable for link time optimization. When used with -S this generates LLVM intermediate language assembly files, otherwise this generates LLVM bitcode format object files (which may be passed to the linker depending on the stage selection options).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-emit-llvm",
      "range": [],
      "description": "Generate output files in LLVM formats, suitable for link time optimization.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Qunused-arguments",
      "range": [],
      "description": "Do not emit any warnings for unused driver arguments.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Wa",
      "range": [],
      "description": "Pass the comma separated arguments in args to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Wl",
      "range": [],
      "description": "Pass the comma separated arguments in args to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Wp",
      "range": [],
      "description": "Pass the comma separated arguments in args to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Xanalyzer",
      "range": [],
      "description": "Pass arg to the static analyzer.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Xassembler",
      "range": [],
      "description": "Pass arg to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Xlinker",
      "range": [],
      "description": "Pass arg to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Xpreprocessor",
      "range": [],
      "description": "Pass arg to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-o",
      "range": [],
      "description": "Write output to file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-print-file-name",
      "range": [],
      "description": "Print the full library path of file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-print-libgcc-file-name",
      "range": [],
      "description": "Print the library path for the currently used compiler runtime library ('libgcc.a' or 'libclang_rt.builtins.*.a').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-print-prog-name",
      "range": [],
      "description": "Print the full program path of name.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-print-search-dirs",
      "range": [],
      "description": "Print the paths used for finding libraries and programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-save-temps",
      "range": [],
      "description": "Save intermediate compilation results.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-save-stats",
      "range": [
        "cwd",
        "obj"
      ],
      "description": "Save internal code generation (LLVM) statistics to a file in the current directory (-save-stats/'-save-stats=cwd') or the directory of the output file ('-save-state=obj').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-integrated-as",
      "range": [],
      "description": "Enable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-no-integrated-as",
      "range": [],
      "description": "Disable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-time",
      "range": [],
      "description": "Time individual commands.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-ftime-report",
      "range": [],
      "description": "Print timing summary of each stage of compilation.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-v",
      "range": [],
      "description": "Show commands to run and use verbose output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fshow-column",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fshow-source-location",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fcaret-diagnostics",
      "range": [],
      "description": "The line of source code that the issue occurs on, along with a caret and ranges that indicate the important locations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fdiagnostics-fixit-info",
      "range": [],
      "description": "'FixIt' information, which is a concise explanation of how to fix the problem (when Clang is certain it knows).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fdiagnostics-parseable-fixits",
      "range": [],
      "description": "Print fix-its in machine parseable form",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fdiagnostics-print-source-range-info",
      "range": [],
      "description": "A machine-parsable representation of the ranges involved (off by default)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fdiagnostics-show-option",
      "range": [],
      "description": "An option that indicates how to control the diagnostic (for diagnostics that support it)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-nostdinc",
      "range": [],
      "description": "Do not search the standard system directories or compiler builtin directories for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-nostdlibinc",
      "range": [],
      "description": "Do not search the standard system directories for include files, but do search compiler builtin include directories.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-nobuiltininc",
      "range": [],
      "description": "Do not search clang's builtin directory for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-O0",
      "range": [],
      "description": "Means 'no optimization': this level compiles the fastest and generates the most debuggable code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-O1",
      "range": [],
      "description": "Somewhere between -O0 and -O2.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-O2",
      "range": [],
      "description": "Moderate level of optimization which enables most optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-O3",
      "range": [],
      "description": "Like -O2, except that it enables optimizations that take longer to perform or that may generate larger code (in an attempt to make the program run faster).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Ofast",
      "range": [],
      "description": "Enables all the optimizations from -O3 along with other aggressive optimizations that may violate strict compliance with language standards.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Os",
      "range": [],
      "description": "Like -O2 with extra optimizations to reduce code size.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Oz",
      "range": [],
      "description": "Like -Os (and thus -O2), but reduces code size further.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Og",
      "range": [],
      "description": "Like -O1. In future versions, this option might disable different optimizations in order to improve debuggability.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-O",
      "range": [],
      "description": "Equivalent to -O1.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-O4",
      "range": [],
      "description": "And higher. Currently equivalent to -O3",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-noFlangLibs",
      "range": [],
      "description": "Do not link against Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-mp",
      "range": [],
      "description": "Enable OpenMP and link with with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-nomp",
      "range": [],
      "description": "Do not link with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mbackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like any other character.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mnobackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like a C-style escape character (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mbyteswapio",
      "range": [],
      "description": "Swap byte-order for unformatted input/output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mfixed",
      "range": [],
      "description": "Assume fixed-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mextend",
      "range": [],
      "description": "Allow source lines up to 132 characters.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mfreeform",
      "range": [],
      "description": "Assume free-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mpreprocess",
      "range": [],
      "description": "Run preprocessor for Fortran files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mrecursive",
      "range": [],
      "description": "Generate code to allow recursive subprograms.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mstandard",
      "range": [],
      "description": "Check standard conformance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Msave",
      "range": [],
      "description": "Assume all variables have SAVE attribute.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-module",
      "range": [],
      "description": "path to module file (-I also works).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mallocatable",
      "range": [],
      "description": "Select Fortran 03 semantics for assignments to allocatable objects (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-static-flang-libs",
      "range": [],
      "description": "Link using static Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-M[no]daz",
      "range": [],
      "description": "Treat denormalized numbers as zero.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-M[no]flushz",
      "range": [],
      "description": "Set SSE to flush-to-zero mode.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-Mcache_align",
      "range": [],
      "description": "Align large objects on cache-line boundaries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-M[no]fprelaxed",
      "range": [],
      "description": "This option is ignored.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fdefault-integer-8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fdefault-real-8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-i8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-r8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.1.0",
      "name": "-fno-fortran-main",
      "range": [],
      "description": "Don't link in Fortran main.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mflushz",
      "range": [],
      "description": "This option indicates whether to flush denormalized floating-point values to zero and is different from other unsecure floating-point optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ffp-contract",
      "range": [],
      "description": "The value of this option can be off, on, or fast. BiSheng compiler sets the value to fast by default to enable floating-point multiply-add operations and combine multiplication and addition into one operation, improving operation performance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-faarch64-pow-alt-precision",
      "range": [
        "18",
        "21"
      ],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the pow function so that the computing result of the pow function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-faarch64-minmax-alt-precision",
      "range": [],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the min or max function so that the computing result of the min or max function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-disable-sincos-opt",
      "range": [],
      "description": "This llvm option is used to change the optimization policies of the sin or cos function so that the computing result of the sin or cos function is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-aarch64-recip-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the recip reciprocal instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-aarch64-rsqrt-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the rsqrt reciprocal square root extraction instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-alt-precision-math-functions",
      "range": [],
      "description": "This llvm option is used to replace names of the math functions __mth_i_cosd, __mth_i_asind, and __pd_powi_1 with cosdf, asindf, and powr8i4 to control their precision. (This option must be used together with the KML math library.) This option takes effect only for O1 or higher optimization levels. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-18-math-compatibility",
      "range": [],
      "description": "This llvm option is used to convert math functions such as tgammaf, cbrt, log, and log10 to functions suffixed with _18 to control their precision. (This option must be used together with the KML math library.) This option takes effect only when the optimization level is higher than or equal to O1 and -mllvm -enable-alt-precision-math-functions is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ffp-compatibility",
      "range": [
        "17",
        "18",
        "21"
      ],
      "description": "This general option is used to control all options that need to be enabled to ensure that the calculation result is consistent with that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ffma-combine-fdiv",
      "range": [],
      "description": "This general option is used to optimize the expression a/b+c to fma(a, 1/b, c), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ffma-reverse-associative",
      "range": [],
      "description": "This general option is used to optimize the expression ab+cd to fma(a, b, c*d), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Hx,124,0xc00000",
      "range": [],
      "description": "This Flang option is used to keep the rounding mode of constant initialization consistent with that on the non-ARM computing platform. This option is valid only for Fortran.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-force-customized-pipeline",
      "range": [
        "true",
        "false"
      ],
      "description": "This option forcibly uses the customized pass pipeline. The value true indicates that the optimization is enabled. By default, the optimization is disabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-sad-pattern-recognition",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the absolute value summation operation for differences (sum += abs(a[i] \u2013 b[i])) to generate a more simplified and efficient operation sequence. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-instcombine-ctz-array",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the calculation for De Bruijn sequence table lookup. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-aarch64-loopcond-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option reduces unnecessary instructions for loop condition judgment under some conditions to optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-aarch64-hadd-generation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option uses only one ARM NEON instruction URHADD to complete the vectorized operation (x[i] + y[i] + 1) >> 1 and optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-loop-split",
      "range": [
        "true",
        "false"
      ],
      "description": "This option splits a loop meeting specific conditions into multiple loops to facilitate the reduction of unnecessary loops. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-mem-chk-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the logic of runtime checks generated for LLVM loop vectorization and improves loop vectorization code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-aarch64-ldp-stp-noq",
      "range": [
        "true",
        "false"
      ],
      "description": "This option prohibits the generation of stp/ldp q1, q2, or addr instructions. The performance of these instructions is not ideal. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-func-arg-analysis",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances LLVM range analysis to adapt LLVM function specialization optimization to more functions. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ipsccp-enable-function-specialization",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances the function specialization optimization to adapt the function specialization optimization to functions with function pointers. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-modest-vectorization-unrolling-factors",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies vectorization for loops with a smaller step. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-instcombine-shrink-vector-element",
      "range": [
        "true",
        "false"
      ],
      "description": "This option improves the degree of parallelism (DOP) of vectorized instructions and eliminates the scalar median value generated during vectorization, improving the effect of loop vectorization. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-instcombine-reorder-sum-of-reduce-add",
      "range": [
        "true",
        "false"
      ],
      "description": "This option changes the sequence of reduction operations to improve the reduction code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-replace-fortran-mem-alloc",
      "range": [
        "true",
        "false"
      ],
      "description": "This option allocates stack memory, instead of heap memory, to improve performance when a memory allocation operation of known size (such as arrays) is required in Fortran code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-pg-math-call-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the calling of multiple Fortran math library functions to advance the calling performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-instcombine-gep-common",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the element address calculation for multi-dimensional arrays in complex scenarios (such as nested loops) to reduce the register pressure and improve program performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-sroa-after-unroll",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enables the function of adding SROA after loop unrolling to reduce memory access operations and store variables in the register. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-disable-recursive-bonus",
      "range": [
        "true",
        "false"
      ],
      "description": "This option makes function calling in a recursive function easier to be inlined, improving the performance of frequently called recursive functions. The value true indicates that the inline operation is disabled. The default value is false, indicating that the inline operation is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-disable-recip-sqrt-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the formats of A = (C / sqrt(Y)) and B = A * A in FastMath scenarios to reduce the number of instructions. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-disable-loop-aware-reassociation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option adds loop awareness to Reassociate Pass to limit some operations within the loop, preventing performance deterioration caused by the increase of instructions in the loop. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-enable-gzipcrc32",
      "range": [
        "true",
        "false"
      ],
      "description": "This option identifies the CRC32 calculation logic in the code and uses the built-in instructions of the processor to accelerate the calculation. If this option is set to true, the optimization is enabled. If this option is set to false, the optimization is disabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Hx,70,0x20000000",
      "range": [],
      "description": "In O1, O2, and O3, BiSheng compiler enables minloc and maxloc inlining in the flang1 phase. After inlining, the functions can be called simply using for loops, which facilitates further optimization in LLVM. This option can disable inlining, which is the same as O0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-update-iv-scev",
      "range": [],
      "description": "This option updates the SCEV analysis result in induction variable users pass to display more optimization opportunities. This option is enabled by default, which may increase the compilation duration. If you have high requirements on the compilation duration, you can set -mllvm -update-iv-scev to false. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-gep-common",
      "range": [],
      "description": "This option generates a common parent for GEP clusters that originate from the same instruction by removing add instructions (that are used as indexes). \\r\\n-mllvm -gep-common=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -gep-cluster-min=<Int number> indicates the GEP cluster threshold. The default value is 3. \\r\\n-mllvm -gep-loop-mindepth=<Int number> indicates the loop threshold. The default value is 3.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-array-restructuring",
      "range": [],
      "description": "This option optimizes the memory access mode of one or more arrays in a program and rearranges arrays to reduce the running time. \\r\\n-mllvm -enable-array-restructuring=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -skip-array-restructuring-codegen=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-struct-peel",
      "range": [],
      "description": "This option optimizes structure peeling and increases the local cache when the structure fields in a structure array are accessed, reducing the running time. \\r\\n-mllvm -enable-struct-peel=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -struct-peel-skip-transform=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false. \\r\\n-mllvm -struct-peel-this=... indicates forcibly peeling a structure defined by the user (subject to legality).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-sort-ivusers-before-lsr",
      "range": [],
      "description": "Loop strength reduction (LSR) optimization is performed only after induction variable users are sorted. This prevents binary assembly inconsistency during multiple compilations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mx,218,0x1",
      "range": [],
      "description": "BiSheng compiler enables inlining string comparison in the flang2 phase. After inlining, a function call becomes a simple for-loop character comparison, which can be further optimized in the LLVM. The inlining function is disabled by default. You can use this option to enable the inlining function.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-foverflow-shift-alt-behavior",
      "range": [],
      "description": "For undefined shift behavior that exceeds the bit width of the integer data type, for example, (int) a << 40, BiSheng compiler optimizes the expression to an integer constant in advance to prevent the expression from being identified and optimized as different values in different optimizations. This option is disabled by default.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-E",
      "range": [],
      "description": "Run the preprocessor stage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fsyntax-only",
      "range": [],
      "description": "Run the preprocessor, parser and type checking stages.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-S",
      "range": [],
      "description": "Run the previous stages as well as LLVM generation and optimization stages and target-specific code generation, producing an assembly file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-c",
      "range": [],
      "description": "Run all of the above, plus the assembler, generating a target '.o' object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-x",
      "range": [],
      "description": "Treat subsequent input files as having type language.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-std",
      "range": [
        "c89",
        "c90",
        "iso9899:1990",
        "iso9899:199409",
        "gnu89",
        "gnu90",
        "iso9899:1999",
        "gnu99",
        "c11",
        "iso9899:2011",
        "gnu11",
        "c17",
        "iso9899:2017",
        "gnu17c++98",
        "c++03",
        "gnu++98",
        "gnu++03",
        "c++11",
        "gnu++11",
        "c++14",
        "gnu++14",
        "c++17",
        "gnu++17",
        "c++20",
        "gnu++20",
        "c++2b",
        "gnu++2b",
        "cl1.0",
        "cl1.1",
        "cl1.2",
        "cl2.0",
        "cuda"
      ],
      "description": "Specify the language standard to compile for. The default C language standard is gnu17, except on PS4, where it is gnu99. The default C++ language standard is gnu++17. The default OpenCL language standard is cl1.0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-stdlib",
      "range": [
        "libstdc++",
        "libc++"
      ],
      "description": "Specify the C++ standard library to use; supported options are libstdc++ and libc++. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-rtlib",
      "range": [
        "libgcc",
        "compiler-rt"
      ],
      "description": "Specify the compiler runtime library to use; supported options are libgcc and compiler-rt. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ansi",
      "range": [],
      "description": "Same as -std=c89.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ObjC",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ObjC++",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-trigraphs",
      "range": [],
      "description": "Enable trigraphs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ffreestanding",
      "range": [],
      "description": "Indicate that the file should be compiled for a freestanding, not a hosted, environment. Note that it is assumed that a freestanding environment will additionally provide memcpy, memmove, memset and memcmp implementations, as these are needed for efficient codegen for many programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fno-builtin",
      "range": [],
      "description": "Disable special handling and optimizations of builtin functions like strlen() and malloc().",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fmath-errno",
      "range": [],
      "description": "Indicate that math functions should be treated as updating errno.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fpascal-strings",
      "range": [],
      "description": "Enable support for Pascal-style strings with '\\pfoo'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fms-extensions",
      "range": [],
      "description": "Enable support for Microsoft extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fmsc-version",
      "range": [],
      "description": "Set _MSC_VER. Defaults to 1300 on Windows. Not set otherwise.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fborland-extensions",
      "range": [],
      "description": "Enable support for Borland extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fwritable-strings",
      "range": [],
      "description": "Make all string literals default to writable. This disables uniquing of strings and other optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-flax-vector-conversions",
      "range": [
        "none",
        "integer",
        "all"
      ],
      "description": "Allow loose type checking rules for implicit vector conversions. Defaults to integer if unspecified.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fno-lax-vector-conversions",
      "range": [],
      "description": "Allow loose type checking rules for implicit vector conversions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fblocks",
      "range": [],
      "description": "Enable the 'Blocks' language feature.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fobjc-abi-version",
      "range": [
        "1",
        "2",
        "3"
      ],
      "description": "Select the Objective-C ABI version to use. Available versions are 1 (legacy 'fragile' ABI), 2 (non-fragile ABI 1), and 3 (non-fragile ABI 2).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fobjc-nonfragile-abi-version",
      "range": [],
      "description": "Select the Objective-C non-fragile ABI version to use by default. This will only be used as the Objective-C ABI when the non-fragile ABI is enabled (either via -fobjc-nonfragile-abi, or because it is the platform default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fobjc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fno-objc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI. On platforms for which this is the default ABI, it can be disabled with -fno-objc-nonfragile-abi.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-arch",
      "range": [],
      "description": "Specify the architecture to build for.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-mmacosx-version-min",
      "range": [],
      "description": "When building for macOS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-miphoneos-version-min",
      "range": [],
      "description": "When building for iPhone OS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-mcpu",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-mtune",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-march",
      "range": [],
      "description": "Specify that Clang should generate code for a specific processor family member and later. For example, if you specify -march=i486, the compiler is allowed to generate instructions that are valid on i486 and later processors, but which may not exist on earlier ones.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-g",
      "range": [],
      "description": "Generate debug information.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-gline-tables-only",
      "range": [],
      "description": "Generate only line table debug information. This allows for symbolicated backtraces with inlining information, but does not include any information about variables, their locations or types.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-gmodules",
      "range": [],
      "description": "Generate debug information that contains external references to types defined in Clang modules or precompiled headers instead of emitting redundant debug type information into every object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fstandalone-debug",
      "range": [],
      "description": "Clang supports a number of optimizations to reduce the size of debug information in the binary.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fno-standalone-debug",
      "range": [],
      "description": "On Darwin -fstandalone-debug is enabled by default.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-feliminate-unused-debug-types",
      "range": [],
      "description": "By default, Clang does not emit type information for types that are defined but not used in a program. To retain the debug info for these unused types, the negation -fno-eliminate-unused-debug-types can be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fexceptions",
      "range": [],
      "description": "Enable generation of unwind information. This allows exceptions to be thrown through Clang compiled stack frames. This is on by default in x86-64.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ftrapv",
      "range": [],
      "description": "Generate code to catch integer overflow errors. Signed integer overflow is undefined in C. With this flag, extra code is generated to detect this and abort when it happens.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fvisibility",
      "range": [],
      "description": "This flag sets the default visibility level.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fcommon",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fno-common",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage. It can be disabled with -fno-common.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ftls-model",
      "range": [
        "global-dynamic",
        "local-dynamic",
        "initial-exec",
        "local-exec"
      ],
      "description": "Set the default thread-local storage (TLS) model to use for thread-local variables. Valid values are: 'global-dynamic', 'local-dynamic', 'initial-exec' and 'local-exec'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-flto",
      "range": [
        "full",
        "thin"
      ],
      "description": "Generate output files in LLVM formats, suitable for link time optimization. When used with -S this generates LLVM intermediate language assembly files, otherwise this generates LLVM bitcode format object files (which may be passed to the linker depending on the stage selection options).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-emit-llvm",
      "range": [],
      "description": "Generate output files in LLVM formats, suitable for link time optimization.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Qunused-arguments",
      "range": [],
      "description": "Do not emit any warnings for unused driver arguments.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Wa",
      "range": [],
      "description": "Pass the comma separated arguments in args to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Wl",
      "range": [],
      "description": "Pass the comma separated arguments in args to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Wp",
      "range": [],
      "description": "Pass the comma separated arguments in args to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Xanalyzer",
      "range": [],
      "description": "Pass arg to the static analyzer.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Xassembler",
      "range": [],
      "description": "Pass arg to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Xlinker",
      "range": [],
      "description": "Pass arg to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Xpreprocessor",
      "range": [],
      "description": "Pass arg to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-o",
      "range": [],
      "description": "Write output to file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-print-file-name",
      "range": [],
      "description": "Print the full library path of file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-print-libgcc-file-name",
      "range": [],
      "description": "Print the library path for the currently used compiler runtime library ('libgcc.a' or 'libclang_rt.builtins.*.a').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-print-prog-name",
      "range": [],
      "description": "Print the full program path of name.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-print-search-dirs",
      "range": [],
      "description": "Print the paths used for finding libraries and programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-save-temps",
      "range": [],
      "description": "Save intermediate compilation results.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-save-stats",
      "range": [
        "cwd",
        "obj"
      ],
      "description": "Save internal code generation (LLVM) statistics to a file in the current directory (-save-stats/'-save-stats=cwd') or the directory of the output file ('-save-state=obj').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-integrated-as",
      "range": [],
      "description": "Enable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-no-integrated-as",
      "range": [],
      "description": "Disable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-time",
      "range": [],
      "description": "Time individual commands.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-ftime-report",
      "range": [],
      "description": "Print timing summary of each stage of compilation.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-v",
      "range": [],
      "description": "Show commands to run and use verbose output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fshow-column",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fshow-source-location",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fcaret-diagnostics",
      "range": [],
      "description": "The line of source code that the issue occurs on, along with a caret and ranges that indicate the important locations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fdiagnostics-fixit-info",
      "range": [],
      "description": "'FixIt' information, which is a concise explanation of how to fix the problem (when Clang is certain it knows).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fdiagnostics-parseable-fixits",
      "range": [],
      "description": "Print fix-its in machine parseable form",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fdiagnostics-print-source-range-info",
      "range": [],
      "description": "A machine-parsable representation of the ranges involved (off by default)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fdiagnostics-show-option",
      "range": [],
      "description": "An option that indicates how to control the diagnostic (for diagnostics that support it)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-nostdinc",
      "range": [],
      "description": "Do not search the standard system directories or compiler builtin directories for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-nostdlibinc",
      "range": [],
      "description": "Do not search the standard system directories for include files, but do search compiler builtin include directories.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-nobuiltininc",
      "range": [],
      "description": "Do not search clang's builtin directory for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-O0",
      "range": [],
      "description": "Means 'no optimization': this level compiles the fastest and generates the most debuggable code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-O1",
      "range": [],
      "description": "Somewhere between -O0 and -O2.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-O2",
      "range": [],
      "description": "Moderate level of optimization which enables most optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-O3",
      "range": [],
      "description": "Like -O2, except that it enables optimizations that take longer to perform or that may generate larger code (in an attempt to make the program run faster).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Ofast",
      "range": [],
      "description": "Enables all the optimizations from -O3 along with other aggressive optimizations that may violate strict compliance with language standards.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Os",
      "range": [],
      "description": "Like -O2 with extra optimizations to reduce code size.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Oz",
      "range": [],
      "description": "Like -Os (and thus -O2), but reduces code size further.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Og",
      "range": [],
      "description": "Like -O1. In future versions, this option might disable different optimizations in order to improve debuggability.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-O",
      "range": [],
      "description": "Equivalent to -O1.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-O4",
      "range": [],
      "description": "And higher. Currently equivalent to -O3",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-noFlangLibs",
      "range": [],
      "description": "Do not link against Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-mp",
      "range": [],
      "description": "Enable OpenMP and link with with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-nomp",
      "range": [],
      "description": "Do not link with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mbackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like any other character.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mnobackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like a C-style escape character (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mbyteswapio",
      "range": [],
      "description": "Swap byte-order for unformatted input/output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mfixed",
      "range": [],
      "description": "Assume fixed-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mextend",
      "range": [],
      "description": "Allow source lines up to 132 characters.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mfreeform",
      "range": [],
      "description": "Assume free-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mpreprocess",
      "range": [],
      "description": "Run preprocessor for Fortran files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mrecursive",
      "range": [],
      "description": "Generate code to allow recursive subprograms.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mstandard",
      "range": [],
      "description": "Check standard conformance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Msave",
      "range": [],
      "description": "Assume all variables have SAVE attribute.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-module",
      "range": [],
      "description": "path to module file (-I also works).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mallocatable",
      "range": [],
      "description": "Select Fortran 03 semantics for assignments to allocatable objects (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-static-flang-libs",
      "range": [],
      "description": "Link using static Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-M[no]daz",
      "range": [],
      "description": "Treat denormalized numbers as zero.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-M[no]flushz",
      "range": [],
      "description": "Set SSE to flush-to-zero mode.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-Mcache_align",
      "range": [],
      "description": "Align large objects on cache-line boundaries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-M[no]fprelaxed",
      "range": [],
      "description": "This option is ignored.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fdefault-integer-8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fdefault-real-8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-i8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-r8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.3.0",
      "name": "-fno-fortran-main",
      "range": [],
      "description": "Don't link in Fortran main.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mflushz",
      "range": [],
      "description": "This option indicates whether to flush denormalized floating-point values to zero and is different from other unsecure floating-point optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ffp-contract",
      "range": [],
      "description": "The value of this option can be off, on, or fast. BiSheng compiler sets the value to fast by default to enable floating-point multiply-add operations and combine multiplication and addition into one operation, improving operation performance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-faarch64-pow-alt-precision",
      "range": [
        "18",
        "21"
      ],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the pow function so that the computing result of the pow function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-faarch64-minmax-alt-precision",
      "range": [],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the min or max function so that the computing result of the min or max function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-disable-sincos-opt",
      "range": [],
      "description": "This llvm option is used to change the optimization policies of the sin or cos function so that the computing result of the sin or cos function is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-aarch64-recip-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the recip reciprocal instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-aarch64-rsqrt-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the rsqrt reciprocal square root extraction instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-alt-precision-math-functions",
      "range": [],
      "description": "This llvm option is used to replace names of the math functions __mth_i_cosd, __mth_i_asind, and __pd_powi_1 with cosdf, asindf, and powr8i4 to control their precision. (This option must be used together with the KML math library.) This option takes effect only for O1 or higher optimization levels. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-18-math-compatibility",
      "range": [],
      "description": "This llvm option is used to convert math functions such as tgammaf, cbrt, log, and log10 to functions suffixed with _18 to control their precision. (This option must be used together with the KML math library.) This option takes effect only when the optimization level is higher than or equal to O1 and -mllvm -enable-alt-precision-math-functions is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ffp-compatibility",
      "range": [
        "17",
        "18",
        "21"
      ],
      "description": "This general option is used to control all options that need to be enabled to ensure that the calculation result is consistent with that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ffma-combine-fdiv",
      "range": [],
      "description": "This general option is used to optimize the expression a/b+c to fma(a, 1/b, c), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ffma-reverse-associative",
      "range": [],
      "description": "This general option is used to optimize the expression ab+cd to fma(a, b, c*d), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Hx,124,0xc00000",
      "range": [],
      "description": "This Flang option is used to keep the rounding mode of constant initialization consistent with that on the non-ARM computing platform. This option is valid only for Fortran.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-force-customized-pipeline",
      "range": [
        "true",
        "false"
      ],
      "description": "This option forcibly uses the customized pass pipeline. The value true indicates that the optimization is enabled. By default, the optimization is disabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-sad-pattern-recognition",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the absolute value summation operation for differences (sum += abs(a[i] \u2013 b[i])) to generate a more simplified and efficient operation sequence. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-instcombine-ctz-array",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the calculation for De Bruijn sequence table lookup. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-aarch64-loopcond-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option reduces unnecessary instructions for loop condition judgment under some conditions to optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-aarch64-hadd-generation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option uses only one ARM NEON instruction URHADD to complete the vectorized operation (x[i] + y[i] + 1) >> 1 and optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-loop-split",
      "range": [
        "true",
        "false"
      ],
      "description": "This option splits a loop meeting specific conditions into multiple loops to facilitate the reduction of unnecessary loops. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-mem-chk-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the logic of runtime checks generated for LLVM loop vectorization and improves loop vectorization code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-aarch64-ldp-stp-noq",
      "range": [
        "true",
        "false"
      ],
      "description": "This option prohibits the generation of stp/ldp q1, q2, or addr instructions. The performance of these instructions is not ideal. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-func-arg-analysis",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances LLVM range analysis to adapt LLVM function specialization optimization to more functions. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ipsccp-enable-function-specialization",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances the function specialization optimization to adapt the function specialization optimization to functions with function pointers. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-modest-vectorization-unrolling-factors",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies vectorization for loops with a smaller step. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-instcombine-shrink-vector-element",
      "range": [
        "true",
        "false"
      ],
      "description": "This option improves the degree of parallelism (DOP) of vectorized instructions and eliminates the scalar median value generated during vectorization, improving the effect of loop vectorization. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-instcombine-reorder-sum-of-reduce-add",
      "range": [
        "true",
        "false"
      ],
      "description": "This option changes the sequence of reduction operations to improve the reduction code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-replace-fortran-mem-alloc",
      "range": [
        "true",
        "false"
      ],
      "description": "This option allocates stack memory, instead of heap memory, to improve performance when a memory allocation operation of known size (such as arrays) is required in Fortran code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-pg-math-call-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the calling of multiple Fortran math library functions to advance the calling performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-instcombine-gep-common",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the element address calculation for multi-dimensional arrays in complex scenarios (such as nested loops) to reduce the register pressure and improve program performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-sroa-after-unroll",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enables the function of adding SROA after loop unrolling to reduce memory access operations and store variables in the register. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-disable-recursive-bonus",
      "range": [
        "true",
        "false"
      ],
      "description": "This option makes function calling in a recursive function easier to be inlined, improving the performance of frequently called recursive functions. The value true indicates that the inline operation is disabled. The default value is false, indicating that the inline operation is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-disable-recip-sqrt-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the formats of A = (C / sqrt(Y)) and B = A * A in FastMath scenarios to reduce the number of instructions. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-disable-loop-aware-reassociation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option adds loop awareness to Reassociate Pass to limit some operations within the loop, preventing performance deterioration caused by the increase of instructions in the loop. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-gzipcrc32",
      "range": [
        "true",
        "false"
      ],
      "description": "This option identifies the CRC32 calculation logic in the code and uses the built-in instructions of the processor to accelerate the calculation. If this option is set to true, the optimization is enabled. If this option is set to false, the optimization is disabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Hx,70,0x20000000",
      "range": [],
      "description": "In O1, O2, and O3, BiSheng compiler enables minloc and maxloc inlining in the flang1 phase. After inlining, the functions can be called simply using for loops, which facilitates further optimization in LLVM. This option can disable inlining, which is the same as O0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-update-iv-scev",
      "range": [],
      "description": "This option updates the SCEV analysis result in induction variable users pass to display more optimization opportunities. This option is enabled by default, which may increase the compilation duration. If you have high requirements on the compilation duration, you can set -mllvm -update-iv-scev to false. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-gep-common",
      "range": [],
      "description": "This option generates a common parent for GEP clusters that originate from the same instruction by removing add instructions (that are used as indexes). \\r\\n-mllvm -gep-common=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -gep-cluster-min=<Int number> indicates the GEP cluster threshold. The default value is 3. \\r\\n-mllvm -gep-loop-mindepth=<Int number> indicates the loop threshold. The default value is 3.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-array-restructuring",
      "range": [],
      "description": "This option optimizes the memory access mode of one or more arrays in a program and rearranges arrays to reduce the running time. \\r\\n-mllvm -enable-array-restructuring=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -skip-array-restructuring-codegen=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-struct-peel",
      "range": [],
      "description": "This option optimizes structure peeling and increases the local cache when the structure fields in a structure array are accessed, reducing the running time. \\r\\n-mllvm -enable-struct-peel=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -struct-peel-skip-transform=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false. \\r\\n-mllvm -struct-peel-this=... indicates forcibly peeling a structure defined by the user (subject to legality). \\r\\n-mllvm -struct-peel-memory-id=<true/false> supports peeling multiple arrays of the same type. The default value is true.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-instcombine-simplify-mul64",
      "range": [
        "true/false"
      ],
      "description": "MUL64 instruction peephole optimization takes effect only at the AArch64 backend and C and C++ frontends. This option is enabled by default. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-sroa-inferred-inlining",
      "range": [
        "true/false"
      ],
      "description": "This option performs inline enhancement and optimization based on SROA information. This takes effect only at the AArch64 backend and C and C++ frontends. This option is disabled by default. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-enable-combine-sqrt-exp",
      "range": [
        "true/false"
      ],
      "description": "This option optimizes the sqrt(exp(x)) operation to exp(x*0.5), which takes effect only at the AArch64 backend and C and C++ frontends. This option is enabled by default and takes effect when the -ffast-math option is enabled and the optimization level is greater than or equal to O1. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-replace-sqrt-compare-by-square",
      "range": [
        "true/false"
      ],
      "description": "This option optimizes the specific scenarios where the sqrt is used as the redirection judgment logic. This takes effect only when it is enabled together with the -ffast-math option at the AArch64 backend and C and C++ frontends. This option is disabled by default. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fopenmp-reduction-duplicate",
      "range": [],
      "description": "This option enhances the vectorization capability in the OpenMP reduction scenario. This takes effect at the AArch64 backend and C and C++ frontends only when -fopenmp is enabled. This option is enabled by default. You can run -fno-openmp-reduction-duplicate to disable it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fopenmp-firstprivatize-locals",
      "range": [],
      "description": "This option enhances the vectorization capability in the OpenMP firstprivatize scenario. This takes effect at the AArch64 backend and C and C++ frontends only when -fopenmp is enabled. This option is enabled by default. You can run -fno-openmp-firstprivatize-locals to disable it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-sort-ivusers-before-lsr",
      "range": [],
      "description": "Loop strength reduction (LSR) optimization is performed only after induction variable users are sorted. This prevents binary assembly inconsistency during multiple compilations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mx,218,0x1",
      "range": [],
      "description": "BiSheng compiler enables inlining string comparison in the flang2 phase. After inlining, a function call becomes a simple for-loop character comparison, which can be further optimized in the LLVM. The inlining function is disabled by default. You can use this option to enable the inlining function.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-foverflow-shift-alt-behavior",
      "range": [],
      "description": "For undefined shift behavior that exceeds the bit width of the integer data type, for example, (int) a << 40, BiSheng compiler optimizes the expression to an integer constant in advance to prevent the expression from being identified and optimized as different values in different optimizations. This option is disabled by default.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-E",
      "range": [],
      "description": "Run the preprocessor stage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fsyntax-only",
      "range": [],
      "description": "Run the preprocessor, parser and type checking stages.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-S",
      "range": [],
      "description": "Run the previous stages as well as LLVM generation and optimization stages and target-specific code generation, producing an assembly file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-c",
      "range": [],
      "description": "Run all of the above, plus the assembler, generating a target '.o' object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-x",
      "range": [],
      "description": "Treat subsequent input files as having type language.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-std",
      "range": [
        "c89",
        "c90",
        "iso9899:1990",
        "iso9899:199409",
        "gnu89",
        "gnu90",
        "iso9899:1999",
        "gnu99",
        "c11",
        "iso9899:2011",
        "gnu11",
        "c17",
        "iso9899:2017",
        "gnu17c++98",
        "c++03",
        "gnu++98",
        "gnu++03",
        "c++11",
        "gnu++11",
        "c++14",
        "gnu++14",
        "c++17",
        "gnu++17",
        "c++20",
        "gnu++20",
        "c++2b",
        "gnu++2b",
        "cl1.0",
        "cl1.1",
        "cl1.2",
        "cl2.0",
        "cuda"
      ],
      "description": "Specify the language standard to compile for. The default C language standard is gnu17, except on PS4, where it is gnu99. The default C++ language standard is gnu++17. The default OpenCL language standard is cl1.0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-stdlib",
      "range": [
        "libstdc++",
        "libc++"
      ],
      "description": "Specify the C++ standard library to use; supported options are libstdc++ and libc++. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-rtlib",
      "range": [
        "libgcc",
        "compiler-rt"
      ],
      "description": "Specify the compiler runtime library to use; supported options are libgcc and compiler-rt. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ansi",
      "range": [],
      "description": "Same as -std=c89.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ObjC",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ObjC++",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-trigraphs",
      "range": [],
      "description": "Enable trigraphs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ffreestanding",
      "range": [],
      "description": "Indicate that the file should be compiled for a freestanding, not a hosted, environment. Note that it is assumed that a freestanding environment will additionally provide memcpy, memmove, memset and memcmp implementations, as these are needed for efficient codegen for many programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fno-builtin",
      "range": [],
      "description": "Disable special handling and optimizations of builtin functions like strlen() and malloc().",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fmath-errno",
      "range": [],
      "description": "Indicate that math functions should be treated as updating errno.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fpascal-strings",
      "range": [],
      "description": "Enable support for Pascal-style strings with '\\pfoo'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fms-extensions",
      "range": [],
      "description": "Enable support for Microsoft extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fmsc-version",
      "range": [],
      "description": "Set _MSC_VER. Defaults to 1300 on Windows. Not set otherwise.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fborland-extensions",
      "range": [],
      "description": "Enable support for Borland extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fwritable-strings",
      "range": [],
      "description": "Make all string literals default to writable. This disables uniquing of strings and other optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-flax-vector-conversions",
      "range": [
        "none",
        "integer",
        "all"
      ],
      "description": "Allow loose type checking rules for implicit vector conversions. Defaults to integer if unspecified.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fno-lax-vector-conversions",
      "range": [],
      "description": "Allow loose type checking rules for implicit vector conversions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fblocks",
      "range": [],
      "description": "Enable the 'Blocks' language feature.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fobjc-abi-version",
      "range": [
        "1",
        "2",
        "3"
      ],
      "description": "Select the Objective-C ABI version to use. Available versions are 1 (legacy 'fragile' ABI), 2 (non-fragile ABI 1), and 3 (non-fragile ABI 2).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fobjc-nonfragile-abi-version",
      "range": [],
      "description": "Select the Objective-C non-fragile ABI version to use by default. This will only be used as the Objective-C ABI when the non-fragile ABI is enabled (either via -fobjc-nonfragile-abi, or because it is the platform default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fobjc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fno-objc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI. On platforms for which this is the default ABI, it can be disabled with -fno-objc-nonfragile-abi.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-arch",
      "range": [],
      "description": "Specify the architecture to build for.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-mmacosx-version-min",
      "range": [],
      "description": "When building for macOS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-miphoneos-version-min",
      "range": [],
      "description": "When building for iPhone OS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-mcpu",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-mtune",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-march",
      "range": [],
      "description": "Specify that Clang should generate code for a specific processor family member and later. For example, if you specify -march=i486, the compiler is allowed to generate instructions that are valid on i486 and later processors, but which may not exist on earlier ones.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-g",
      "range": [],
      "description": "Generate debug information.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-gline-tables-only",
      "range": [],
      "description": "Generate only line table debug information. This allows for symbolicated backtraces with inlining information, but does not include any information about variables, their locations or types.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-gmodules",
      "range": [],
      "description": "Generate debug information that contains external references to types defined in Clang modules or precompiled headers instead of emitting redundant debug type information into every object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fstandalone-debug",
      "range": [],
      "description": "Clang supports a number of optimizations to reduce the size of debug information in the binary.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fno-standalone-debug",
      "range": [],
      "description": "On Darwin -fstandalone-debug is enabled by default.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-feliminate-unused-debug-types",
      "range": [],
      "description": "By default, Clang does not emit type information for types that are defined but not used in a program. To retain the debug info for these unused types, the negation -fno-eliminate-unused-debug-types can be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fexceptions",
      "range": [],
      "description": "Enable generation of unwind information. This allows exceptions to be thrown through Clang compiled stack frames. This is on by default in x86-64.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ftrapv",
      "range": [],
      "description": "Generate code to catch integer overflow errors. Signed integer overflow is undefined in C. With this flag, extra code is generated to detect this and abort when it happens.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fvisibility",
      "range": [],
      "description": "This flag sets the default visibility level.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fcommon",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fno-common",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage. It can be disabled with -fno-common.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ftls-model",
      "range": [
        "global-dynamic",
        "local-dynamic",
        "initial-exec",
        "local-exec"
      ],
      "description": "Set the default thread-local storage (TLS) model to use for thread-local variables. Valid values are: 'global-dynamic', 'local-dynamic', 'initial-exec' and 'local-exec'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-flto",
      "range": [
        "full",
        "thin"
      ],
      "description": "Generate output files in LLVM formats, suitable for link time optimization. When used with -S this generates LLVM intermediate language assembly files, otherwise this generates LLVM bitcode format object files (which may be passed to the linker depending on the stage selection options).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-emit-llvm",
      "range": [],
      "description": "Generate output files in LLVM formats, suitable for link time optimization.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Qunused-arguments",
      "range": [],
      "description": "Do not emit any warnings for unused driver arguments.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Wa",
      "range": [],
      "description": "Pass the comma separated arguments in args to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Wl",
      "range": [],
      "description": "Pass the comma separated arguments in args to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Wp",
      "range": [],
      "description": "Pass the comma separated arguments in args to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Xanalyzer",
      "range": [],
      "description": "Pass arg to the static analyzer.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Xassembler",
      "range": [],
      "description": "Pass arg to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Xlinker",
      "range": [],
      "description": "Pass arg to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Xpreprocessor",
      "range": [],
      "description": "Pass arg to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-o",
      "range": [],
      "description": "Write output to file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-print-file-name",
      "range": [],
      "description": "Print the full library path of file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-print-libgcc-file-name",
      "range": [],
      "description": "Print the library path for the currently used compiler runtime library ('libgcc.a' or 'libclang_rt.builtins.*.a').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-print-prog-name",
      "range": [],
      "description": "Print the full program path of name.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-print-search-dirs",
      "range": [],
      "description": "Print the paths used for finding libraries and programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-save-temps",
      "range": [],
      "description": "Save intermediate compilation results.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-save-stats",
      "range": [
        "cwd",
        "obj"
      ],
      "description": "Save internal code generation (LLVM) statistics to a file in the current directory (-save-stats/'-save-stats=cwd') or the directory of the output file ('-save-state=obj').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-integrated-as",
      "range": [],
      "description": "Enable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-no-integrated-as",
      "range": [],
      "description": "Disable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-time",
      "range": [],
      "description": "Time individual commands.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-ftime-report",
      "range": [],
      "description": "Print timing summary of each stage of compilation.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-v",
      "range": [],
      "description": "Show commands to run and use verbose output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fshow-column",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fshow-source-location",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fcaret-diagnostics",
      "range": [],
      "description": "The line of source code that the issue occurs on, along with a caret and ranges that indicate the important locations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fdiagnostics-fixit-info",
      "range": [],
      "description": "'FixIt' information, which is a concise explanation of how to fix the problem (when Clang is certain it knows).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fdiagnostics-parseable-fixits",
      "range": [],
      "description": "Print fix-its in machine parseable form",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fdiagnostics-print-source-range-info",
      "range": [],
      "description": "A machine-parsable representation of the ranges involved (off by default)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fdiagnostics-show-option",
      "range": [],
      "description": "An option that indicates how to control the diagnostic (for diagnostics that support it)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-nostdinc",
      "range": [],
      "description": "Do not search the standard system directories or compiler builtin directories for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-nostdlibinc",
      "range": [],
      "description": "Do not search the standard system directories for include files, but do search compiler builtin include directories.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-nobuiltininc",
      "range": [],
      "description": "Do not search clang's builtin directory for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-O0",
      "range": [],
      "description": "Means 'no optimization': this level compiles the fastest and generates the most debuggable code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-O1",
      "range": [],
      "description": "Somewhere between -O0 and -O2.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-O2",
      "range": [],
      "description": "Moderate level of optimization which enables most optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-O3",
      "range": [],
      "description": "Like -O2, except that it enables optimizations that take longer to perform or that may generate larger code (in an attempt to make the program run faster).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Ofast",
      "range": [],
      "description": "Enables all the optimizations from -O3 along with other aggressive optimizations that may violate strict compliance with language standards.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Os",
      "range": [],
      "description": "Like -O2 with extra optimizations to reduce code size.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Oz",
      "range": [],
      "description": "Like -Os (and thus -O2), but reduces code size further.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Og",
      "range": [],
      "description": "Like -O1. In future versions, this option might disable different optimizations in order to improve debuggability.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-O",
      "range": [],
      "description": "Equivalent to -O1.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-O4",
      "range": [],
      "description": "And higher. Currently equivalent to -O3",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-noFlangLibs",
      "range": [],
      "description": "Do not link against Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-mp",
      "range": [],
      "description": "Enable OpenMP and link with with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-nomp",
      "range": [],
      "description": "Do not link with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mbackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like any other character.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mnobackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like a C-style escape character (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mbyteswapio",
      "range": [],
      "description": "Swap byte-order for unformatted input/output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mfixed",
      "range": [],
      "description": "Assume fixed-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mextend",
      "range": [],
      "description": "Allow source lines up to 132 characters.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mfreeform",
      "range": [],
      "description": "Assume free-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mpreprocess",
      "range": [],
      "description": "Run preprocessor for Fortran files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mrecursive",
      "range": [],
      "description": "Generate code to allow recursive subprograms.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mstandard",
      "range": [],
      "description": "Check standard conformance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Msave",
      "range": [],
      "description": "Assume all variables have SAVE attribute.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-module",
      "range": [],
      "description": "path to module file (-I also works).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mallocatable",
      "range": [],
      "description": "Select Fortran 03 semantics for assignments to allocatable objects (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-static-flang-libs",
      "range": [],
      "description": "Link using static Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-M[no]daz",
      "range": [],
      "description": "Treat denormalized numbers as zero.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-M[no]flushz",
      "range": [],
      "description": "Set SSE to flush-to-zero mode.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-Mcache_align",
      "range": [],
      "description": "Align large objects on cache-line boundaries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-M[no]fprelaxed",
      "range": [],
      "description": "This option is ignored.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fdefault-integer-8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fdefault-real-8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-i8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-r8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.4.0",
      "name": "-fno-fortran-main",
      "range": [],
      "description": "Don't link in Fortran main.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mflushz",
      "range": [],
      "description": "This option indicates whether to flush denormalized floating-point values to zero and is different from other unsecure floating-point optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ffp-contract",
      "range": [],
      "description": "The value of this option can be off, on, or fast. BiSheng compiler sets the value to fast by default to enable floating-point multiply-add operations and combine multiplication and addition into one operation, improving operation performance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-faarch64-pow-alt-precision",
      "range": [
        "18",
        "21"
      ],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the pow function so that the computing result of the pow function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-faarch64-minmax-alt-precision",
      "range": [],
      "description": "This Flang option is valid only for Fortran code. It is used to change the optimization policy of the min or max function so that the computing result of the min or max function is the same as that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-disable-sincos-opt",
      "range": [],
      "description": "This llvm option is used to change the optimization policies of the sin or cos function so that the computing result of the sin or cos function is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-aarch64-recip-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the recip reciprocal instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-aarch64-rsqrt-alt-precision",
      "range": [],
      "description": "This llc option uses the soft floating-point compensation to ensure that the computing result of the rsqrt reciprocal square root extraction instruction is the same as that on the non-ARM platform. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-alt-precision-math-functions",
      "range": [],
      "description": "This llvm option is used to replace names of the math functions __mth_i_cosd, __mth_i_asind, and __pd_powi_1 with cosdf, asindf, and powr8i4 to control their precision. (This option must be used together with the KML math library.) This option takes effect only for O1 or higher optimization levels. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-18-math-compatibility",
      "range": [],
      "description": "This llvm option is used to convert math functions such as tgammaf, cbrt, log, and log10 to functions suffixed with _18 to control their precision. (This option must be used together with the KML math library.) This option takes effect only when the optimization level is higher than or equal to O1 and -mllvm -enable-alt-precision-math-functions is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ffp-compatibility",
      "range": [
        "17",
        "18",
        "21"
      ],
      "description": "This general option is used to control all options that need to be enabled to ensure that the calculation result is consistent with that on the non-ARM platform.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ffma-combine-fdiv",
      "range": [],
      "description": "This general option is used to optimize the expression a/b+c to fma(a, 1/b, c), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ffma-reverse-associative",
      "range": [],
      "description": "This general option is used to optimize the expression ab+cd to fma(a, b, c*d), which ensures that the calculation result is consistent with that on the non-ARM platform. This parameter is valid only when -ffp-contract is set to fast.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Hx,124,0xc00000",
      "range": [],
      "description": "This Flang option is used to keep the rounding mode of constant initialization consistent with that on the non-ARM computing platform. This option is valid only for Fortran.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-force-customized-pipeline",
      "range": [
        "true",
        "false"
      ],
      "description": "This option forcibly uses the customized pass pipeline. The value true indicates that the optimization is enabled. By default, the optimization is disabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-sad-pattern-recognition",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the absolute value summation operation for differences (sum += abs(a[i] \u2013 b[i])) to generate a more simplified and efficient operation sequence. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-instcombine-ctz-array",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the calculation for De Bruijn sequence table lookup. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-aarch64-loopcond-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option reduces unnecessary instructions for loop condition judgment under some conditions to optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-aarch64-hadd-generation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option uses only one ARM NEON instruction URHADD to complete the vectorized operation (x[i] + y[i] + 1) >> 1 and optimize the code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-loop-split",
      "range": [
        "true",
        "false"
      ],
      "description": "This option splits a loop meeting specific conditions into multiple loops to facilitate the reduction of unnecessary loops. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-mem-chk-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the logic of runtime checks generated for LLVM loop vectorization and improves loop vectorization code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-aarch64-ldp-stp-noq",
      "range": [
        "true",
        "false"
      ],
      "description": "This option prohibits the generation of stp/ldp q1, q2, or addr instructions. The performance of these instructions is not ideal. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-func-arg-analysis",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances LLVM range analysis to adapt LLVM function specialization optimization to more functions. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ipsccp-enable-function-specialization",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enhances the function specialization optimization to adapt the function specialization optimization to functions with function pointers. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-modest-vectorization-unrolling-factors",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies vectorization for loops with a smaller step. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-instcombine-shrink-vector-element",
      "range": [
        "true",
        "false"
      ],
      "description": "This option improves the degree of parallelism (DOP) of vectorized instructions and eliminates the scalar median value generated during vectorization, improving the effect of loop vectorization. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-instcombine-reorder-sum-of-reduce-add",
      "range": [
        "true",
        "false"
      ],
      "description": "This option changes the sequence of reduction operations to improve the reduction code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-replace-fortran-mem-alloc",
      "range": [
        "true",
        "false"
      ],
      "description": "This option allocates stack memory, instead of heap memory, to improve performance when a memory allocation operation of known size (such as arrays) is required in Fortran code. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-pg-math-call-simplification",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the calling of multiple Fortran math library functions to advance the calling performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-instcombine-gep-common",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the element address calculation for multi-dimensional arrays in complex scenarios (such as nested loops) to reduce the register pressure and improve program performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-sroa-after-unroll",
      "range": [
        "true",
        "false"
      ],
      "description": "This option enables the function of adding SROA after loop unrolling to reduce memory access operations and store variables in the register. The value true indicates that the optimization is enabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-disable-recursive-bonus",
      "range": [
        "true",
        "false"
      ],
      "description": "This option makes function calling in a recursive function easier to be inlined, improving the performance of frequently called recursive functions. The value true indicates that the inline operation is disabled. The default value is false, indicating that the inline operation is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-disable-recip-sqrt-opt",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the formats of A = (C / sqrt(Y)) and B = A * A in FastMath scenarios to reduce the number of instructions. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-disable-loop-aware-reassociation",
      "range": [
        "true",
        "false"
      ],
      "description": "This option adds loop awareness to Reassociate Pass to limit some operations within the loop, preventing performance deterioration caused by the increase of instructions in the loop. The value true indicates that the optimization is disabled. The default value is false, indicating that the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-gzipcrc32",
      "range": [
        "true",
        "false"
      ],
      "description": "This option identifies the CRC32 calculation logic in the code and uses the built-in instructions of the processor to accelerate the calculation. If this option is set to true, the optimization is enabled. If this option is set to false, the optimization is disabled. By default, the optimization is enabled. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Hx,70,0x20000000",
      "range": [],
      "description": "In O1, O2, and O3, BiSheng compiler enables minloc and maxloc inlining in the flang1 phase. After inlining, the functions can be called simply using for loops, which facilitates further optimization in LLVM. This option can disable inlining, which is the same as O0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-update-iv-scev",
      "range": [],
      "description": "This option updates the SCEV analysis result in induction variable users pass to display more optimization opportunities. This option is enabled by default, which may increase the compilation duration. If you have high requirements on the compilation duration, you can set -mllvm -update-iv-scev to false. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-gep-common",
      "range": [],
      "description": "This option generates a common parent for GEP clusters that originate from the same instruction by removing add instructions (that are used as indexes). \\r\\n-mllvm -gep-common=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -gep-cluster-min=<Int number> indicates the GEP cluster threshold. The default value is 3. \\r\\n-mllvm -gep-loop-mindepth=<Int number> indicates the loop threshold. The default value is 3.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-array-restructuring",
      "range": [],
      "description": "This option optimizes the memory access mode of one or more arrays in a program and rearranges arrays to reduce the running time. \\r\\n-mllvm -enable-array-restructuring=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -skip-array-restructuring-codegen=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-struct-peel",
      "range": [],
      "description": "This option optimizes structure peeling and increases the local cache when the structure fields in a structure array are accessed, reducing the running time. \\r\\n-mllvm -enable-struct-peel=<true|false> indicates whether to enable the optimization. If the value is set to true, the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -struct-peel-skip-transform=<true|false> indicates whether to disable the code generation part of the optimization pass. If the value is set to true, the code generation part of the optimization pass is disabled. The default value is false. \\r\\n-mllvm -struct-peel-this=... indicates forcibly peeling a structure defined by the user (subject to legality).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fopenmp-reduction-duplicate",
      "range": [],
      "description": "This option enhances the vectorization capability in the OpenMP reduction scenario. This takes effect at the AArch64 backend and C and C++ frontends only when -fopenmp is enabled. This option is enabled by default. You can run -fno-openmp-reduction-duplicate to disable it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fopenmp-firstprivatize-locals",
      "range": [],
      "description": "This option enhances the vectorization capability in the OpenMP firstprivatize scenario. This takes effect at the AArch64 backend and C and C++ frontends only when -fopenmp is enabled. This option is enabled by default. You can run -fno-openmp-firstprivatize-locals to disable it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-sort-ivusers-before-lsr",
      "range": [],
      "description": "Loop strength reduction (LSR) optimization is performed only after induction variable users are sorted. This prevents binary assembly inconsistency during multiple compilations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mx,218,0x1",
      "range": [],
      "description": "BiSheng compiler enables inlining string comparison in the flang2 phase. After inlining, a function call becomes a simple for-loop character comparison, which can be further optimized in the LLVM. The inlining function is disabled by default. You can use this option to enable the inlining function.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-foverflow-shift-alt-behavior",
      "range": [],
      "description": "For undefined shift behavior that exceeds the bit width of the integer data type, for example, (int) a << 40, BiSheng compiler optimizes the expression to an integer constant in advance to prevent the expression from being identified and optimized as different values in different optimizations. This option is disabled by default.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-instcombine-simplify-mul64",
      "range": [
        "true",
        "false"
      ],
      "description": "This option simplifies the algorithm of multiplying two 64-bit operands to output a 128-bit data into a more efficient instruction. The value true indicates that the optimization is enabled. By default, the optimization is enabled. Currently, the C and C++ languages and the AArch64 backend are supported. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-replace-sqrt-compare-by-square",
      "range": [
        "true",
        "false"
      ],
      "description": "This option converts the comparison condition by replacing the square root calculation in the comparison condition with the square calculation of its result. This optimization is enabled only when -ffast-math is enabled. The value true indicates that the optimization is enabled. By default, the optimization is disabled. Currently, the C and C++ languages and the AArch64 backend are supported. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-combine-sqrt-exp",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes sqrt(exp(x)) to exp(x*0.5) to eliminate sqrt calculations with high execution costs. This optimization is enabled only when -ffast-math is enabled. The value true indicates that the optimization is enabled. By default, the optimization is enabled. Currently, the C and C++ languages and the AArch64 backend are supported. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-loop-load-widen-patterns",
      "range": [
        "0",
        "1",
        "2"
      ],
      "description": "This option optimizes scenarios where some data can be accessed using a wider data type. Currently, three scenarios are supported, and the IDs are 0, 1, and 2, respectively. Use commas (,) to separate them. By default, the optimization is disabled. This option is disabled by default. Currently, the C language and the AArch64 backend are supported. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-enable-aggressive-inline",
      "range": [
        "true",
        "false"
      ],
      "description": "This option does not consider the __attribute__((noinline)) restriction in the source code and forcibly regards the function as a common function to determine whether to perform inline optimization .The value true indicates that the optimization is enabled. By default, the optimization is disabled. Currently, the C and C++ languages and the AArch64 backend are supported. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-shift-rounding",
      "range": [],
      "description": "This option matches a rounding scenario and selects an appropriate instruction to reduce the running time. \\r\\n-mllvm -aarch64-optimize-rounding =<true|false> controls the optimization. The value true indicates that the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -aarch64-optimize-rounding-saturation=<true|false> determines whether to optimize the SQRSHRUN/UQRSHRN scenario. The value true indicates that the optimization is enabled. By default, the optimization is enabled. \\r\\nThe -mllvm -aarch64-extract-vector-element-trunc-combine=<true|false> provides better instruction selection in some scenarios. The value true indicates that the optimization is enabled. By default, the optimization is enabled. \\r\\n-mllvm -aarch64-rounding-search-max-depth =<integer> sets the search depth of a rounding scenario. The default value is 4.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-aggressive-instcombine-simplify-sqr64",
      "range": [
        "true",
        "false"
      ],
      "description": "This option optimizes the 64-bit SQR operation to use simplified instructions to improve performance. The value true indicates that the optimization is enabled. By default, the optimization is enabled. Currently, the C and C++ languages and the AArch64 backend are supported. To use this option, add -mllvm before it.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-E",
      "range": [],
      "description": "Run the preprocessor stage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fsyntax-only",
      "range": [],
      "description": "Run the preprocessor, parser and type checking stages.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-S",
      "range": [],
      "description": "Run the previous stages as well as LLVM generation and optimization stages and target-specific code generation, producing an assembly file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-c",
      "range": [],
      "description": "Run all of the above, plus the assembler, generating a target '.o' object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-x",
      "range": [],
      "description": "Treat subsequent input files as having type language.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-std",
      "range": [
        "c89",
        "c90",
        "iso9899:1990",
        "iso9899:199409",
        "gnu89",
        "gnu90",
        "iso9899:1999",
        "gnu99",
        "c11",
        "iso9899:2011",
        "gnu11",
        "c17",
        "iso9899:2017",
        "gnu17c++98",
        "c++03",
        "gnu++98",
        "gnu++03",
        "c++11",
        "gnu++11",
        "c++14",
        "gnu++14",
        "c++17",
        "gnu++17",
        "c++20",
        "gnu++20",
        "c++2b",
        "gnu++2b",
        "cl1.0",
        "cl1.1",
        "cl1.2",
        "cl2.0",
        "cuda"
      ],
      "description": "Specify the language standard to compile for. The default C language standard is gnu17, except on PS4, where it is gnu99. The default C++ language standard is gnu++17. The default OpenCL language standard is cl1.0.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-stdlib",
      "range": [
        "libstdc++",
        "libc++"
      ],
      "description": "Specify the C++ standard library to use; supported options are libstdc++ and libc++. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-rtlib",
      "range": [
        "libgcc",
        "compiler-rt"
      ],
      "description": "Specify the compiler runtime library to use; supported options are libgcc and compiler-rt. If not specified, platform default will be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ansi",
      "range": [],
      "description": "Same as -std=c89.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ObjC",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ObjC++",
      "range": [],
      "description": "Treat source input files as Objective-C and Object-C++ inputs respectively.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-trigraphs",
      "range": [],
      "description": "Enable trigraphs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ffreestanding",
      "range": [],
      "description": "Indicate that the file should be compiled for a freestanding, not a hosted, environment. Note that it is assumed that a freestanding environment will additionally provide memcpy, memmove, memset and memcmp implementations, as these are needed for efficient codegen for many programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fno-builtin",
      "range": [],
      "description": "Disable special handling and optimizations of builtin functions like strlen() and malloc().",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fmath-errno",
      "range": [],
      "description": "Indicate that math functions should be treated as updating errno.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fpascal-strings",
      "range": [],
      "description": "Enable support for Pascal-style strings with '\\pfoo'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fms-extensions",
      "range": [],
      "description": "Enable support for Microsoft extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fmsc-version",
      "range": [],
      "description": "Set _MSC_VER. Defaults to 1300 on Windows. Not set otherwise.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fborland-extensions",
      "range": [],
      "description": "Enable support for Borland extensions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fwritable-strings",
      "range": [],
      "description": "Make all string literals default to writable. This disables uniquing of strings and other optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-flax-vector-conversions",
      "range": [
        "none",
        "integer",
        "all"
      ],
      "description": "Allow loose type checking rules for implicit vector conversions. Defaults to integer if unspecified.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fno-lax-vector-conversions",
      "range": [],
      "description": "Allow loose type checking rules for implicit vector conversions.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fblocks",
      "range": [],
      "description": "Enable the 'Blocks' language feature.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fobjc-abi-version",
      "range": [
        "1",
        "2",
        "3"
      ],
      "description": "Select the Objective-C ABI version to use. Available versions are 1 (legacy 'fragile' ABI), 2 (non-fragile ABI 1), and 3 (non-fragile ABI 2).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fobjc-nonfragile-abi-version",
      "range": [],
      "description": "Select the Objective-C non-fragile ABI version to use by default. This will only be used as the Objective-C ABI when the non-fragile ABI is enabled (either via -fobjc-nonfragile-abi, or because it is the platform default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fobjc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fno-objc-nonfragile-abi",
      "range": [],
      "description": "Enable use of the Objective-C non-fragile ABI. On platforms for which this is the default ABI, it can be disabled with -fno-objc-nonfragile-abi.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-arch",
      "range": [],
      "description": "Specify the architecture to build for.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-mmacosx-version-min",
      "range": [],
      "description": "When building for macOS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-miphoneos-version-min",
      "range": [],
      "description": "When building for iPhone OS, specify the minimum version supported by your application.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-mcpu",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-mtune",
      "range": [],
      "description": "Acts as an alias for --print-supported-cpus.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-march",
      "range": [],
      "description": "Specify that Clang should generate code for a specific processor family member and later. For example, if you specify -march=i486, the compiler is allowed to generate instructions that are valid on i486 and later processors, but which may not exist on earlier ones.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-g",
      "range": [],
      "description": "Generate debug information.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-gline-tables-only",
      "range": [],
      "description": "Generate only line table debug information. This allows for symbolicated backtraces with inlining information, but does not include any information about variables, their locations or types.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-gmodules",
      "range": [],
      "description": "Generate debug information that contains external references to types defined in Clang modules or precompiled headers instead of emitting redundant debug type information into every object file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fstandalone-debug",
      "range": [],
      "description": "Clang supports a number of optimizations to reduce the size of debug information in the binary.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fno-standalone-debug",
      "range": [],
      "description": "On Darwin -fstandalone-debug is enabled by default.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-feliminate-unused-debug-types",
      "range": [],
      "description": "By default, Clang does not emit type information for types that are defined but not used in a program. To retain the debug info for these unused types, the negation -fno-eliminate-unused-debug-types can be used.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fexceptions",
      "range": [],
      "description": "Enable generation of unwind information. This allows exceptions to be thrown through Clang compiled stack frames. This is on by default in x86-64.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ftrapv",
      "range": [],
      "description": "Generate code to catch integer overflow errors. Signed integer overflow is undefined in C. With this flag, extra code is generated to detect this and abort when it happens.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fvisibility",
      "range": [],
      "description": "This flag sets the default visibility level.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fcommon",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fno-common",
      "range": [],
      "description": "This flag specifies that variables without initializers get common linkage. It can be disabled with -fno-common.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ftls-model",
      "range": [
        "global-dynamic",
        "local-dynamic",
        "initial-exec",
        "local-exec"
      ],
      "description": "Set the default thread-local storage (TLS) model to use for thread-local variables. Valid values are: 'global-dynamic', 'local-dynamic', 'initial-exec' and 'local-exec'.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-flto",
      "range": [
        "full",
        "thin"
      ],
      "description": "Generate output files in LLVM formats, suitable for link time optimization. When used with -S this generates LLVM intermediate language assembly files, otherwise this generates LLVM bitcode format object files (which may be passed to the linker depending on the stage selection options).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-emit-llvm",
      "range": [],
      "description": "Generate output files in LLVM formats, suitable for link time optimization.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Qunused-arguments",
      "range": [],
      "description": "Do not emit any warnings for unused driver arguments.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Wa",
      "range": [],
      "description": "Pass the comma separated arguments in args to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Wl",
      "range": [],
      "description": "Pass the comma separated arguments in args to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Wp",
      "range": [],
      "description": "Pass the comma separated arguments in args to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Xanalyzer",
      "range": [],
      "description": "Pass arg to the static analyzer.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Xassembler",
      "range": [],
      "description": "Pass arg to the assembler.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Xlinker",
      "range": [],
      "description": "Pass arg to the linker.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Xpreprocessor",
      "range": [],
      "description": "Pass arg to the preprocessor.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-o",
      "range": [],
      "description": "Write output to file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-print-file-name",
      "range": [],
      "description": "Print the full library path of file.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-print-libgcc-file-name",
      "range": [],
      "description": "Print the library path for the currently used compiler runtime library ('libgcc.a' or 'libclang_rt.builtins.*.a').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-print-prog-name",
      "range": [],
      "description": "Print the full program path of name.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-print-search-dirs",
      "range": [],
      "description": "Print the paths used for finding libraries and programs.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-save-temps",
      "range": [],
      "description": "Save intermediate compilation results.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-save-stats",
      "range": [
        "cwd",
        "obj"
      ],
      "description": "Save internal code generation (LLVM) statistics to a file in the current directory (-save-stats/'-save-stats=cwd') or the directory of the output file ('-save-state=obj').",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-integrated-as",
      "range": [],
      "description": "Enable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-no-integrated-as",
      "range": [],
      "description": "Disable the use of the integrated assembler. Whether the integrated assembler is on by default is target dependent.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-time",
      "range": [],
      "description": "Time individual commands.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-ftime-report",
      "range": [],
      "description": "Print timing summary of each stage of compilation.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-v",
      "range": [],
      "description": "Show commands to run and use verbose output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fshow-column",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fshow-source-location",
      "range": [],
      "description": "A file/line/column indicator that shows exactly where the diagnostic occurs in your code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fcaret-diagnostics",
      "range": [],
      "description": "The line of source code that the issue occurs on, along with a caret and ranges that indicate the important locations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fdiagnostics-fixit-info",
      "range": [],
      "description": "'FixIt' information, which is a concise explanation of how to fix the problem (when Clang is certain it knows).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fdiagnostics-parseable-fixits",
      "range": [],
      "description": "Print fix-its in machine parseable form",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fdiagnostics-print-source-range-info",
      "range": [],
      "description": "A machine-parsable representation of the ranges involved (off by default)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fdiagnostics-show-option",
      "range": [],
      "description": "An option that indicates how to control the diagnostic (for diagnostics that support it)",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-nostdinc",
      "range": [],
      "description": "Do not search the standard system directories or compiler builtin directories for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-nostdlibinc",
      "range": [],
      "description": "Do not search the standard system directories for include files, but do search compiler builtin include directories.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-nobuiltininc",
      "range": [],
      "description": "Do not search clang's builtin directory for include files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-O0",
      "range": [],
      "description": "Means 'no optimization': this level compiles the fastest and generates the most debuggable code.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-O1",
      "range": [],
      "description": "Somewhere between -O0 and -O2.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-O2",
      "range": [],
      "description": "Moderate level of optimization which enables most optimizations.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-O3",
      "range": [],
      "description": "Like -O2, except that it enables optimizations that take longer to perform or that may generate larger code (in an attempt to make the program run faster).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Ofast",
      "range": [],
      "description": "Enables all the optimizations from -O3 along with other aggressive optimizations that may violate strict compliance with language standards.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Os",
      "range": [],
      "description": "Like -O2 with extra optimizations to reduce code size.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Oz",
      "range": [],
      "description": "Like -Os (and thus -O2), but reduces code size further.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Og",
      "range": [],
      "description": "Like -O1. In future versions, this option might disable different optimizations in order to improve debuggability.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-O",
      "range": [],
      "description": "Equivalent to -O1.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-O4",
      "range": [],
      "description": "And higher. Currently equivalent to -O3",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-noFlangLibs",
      "range": [],
      "description": "Do not link against Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-mp",
      "range": [],
      "description": "Enable OpenMP and link with with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-nomp",
      "range": [],
      "description": "Do not link with OpenMP library libomp.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mbackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like any other character.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mnobackslash",
      "range": [],
      "description": "Treat backslash in quoted strings like a C-style escape character (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mbyteswapio",
      "range": [],
      "description": "Swap byte-order for unformatted input/output.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mfixed",
      "range": [],
      "description": "Assume fixed-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mextend",
      "range": [],
      "description": "Allow source lines up to 132 characters.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mfreeform",
      "range": [],
      "description": "Assume free-format source.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mpreprocess",
      "range": [],
      "description": "Run preprocessor for Fortran files.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mrecursive",
      "range": [],
      "description": "Generate code to allow recursive subprograms.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mstandard",
      "range": [],
      "description": "Check standard conformance.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Msave",
      "range": [],
      "description": "Assume all variables have SAVE attribute.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-module",
      "range": [],
      "description": "path to module file (-I also works).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mallocatable",
      "range": [],
      "description": "Select Fortran 03 semantics for assignments to allocatable objects (Default).",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-static-flang-libs",
      "range": [],
      "description": "Link using static Flang libraries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-M[no]daz",
      "range": [],
      "description": "Treat denormalized numbers as zero.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-M[no]flushz",
      "range": [],
      "description": "Set SSE to flush-to-zero mode.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-Mcache_align",
      "range": [],
      "description": "Align large objects on cache-line boundaries.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-M[no]fprelaxed",
      "range": [],
      "description": "This option is ignored.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fdefault-integer-8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fdefault-real-8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-i8",
      "range": [],
      "description": "Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-r8",
      "range": [],
      "description": "Treat REAL as REAL*8.",
      "compileName": "BiSheng Compiler"
    },
    {
      "gcc_version": "2.5.0",
      "name": "-fno-fortran-main",
      "range": [],
      "description": "Don't link in Fortran main.",
      "compileName": "BiSheng Compiler"
    }
  ],
  "TeeSecurityFunction": [
    {
      "name": "TEEC_InitializeContext",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "TEEC_Result TEEC_InitializeContext(const char* name, TEEC_Context* context)",
      "description": "This function initializes a new TEE Context, forming a connection between this Client Application and the TEE identified by the string identifier name.",
      "parameters": [
        "name: a zero-terminated string that describes the TEE to connect to.",
        "context: a TEEC_Context structure that MUST be initialized by the Implementation."
      ],
      "return": [
        "0x00000000: the initialization was successful.",
        "0xFFFF0000 - 0xFFFF0010: initialization was not successful."
      ]
    },
    {
      "name": "TEEC_FinalizeContext",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "void TEEC_FinalizeContext(TEEC_Context* context)",
      "description": "This function finalizes an initialized TEE Context, closing the connection between the Client Application and the TEE.",
      "parameters": [
        "context: an initialized TEEC_Context structure which is to be finalized."
      ],
      "return": []
    },
    {
      "name": "TEEC_RegisterSharedMemory",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "TEEC_Result TEEC_RegisterSharedMemory(TEEC_Context* context, TEEC_SharedMemory* sharedMem)",
      "description": "This function registers a block of existing Client Application memory as a block of Shared Memory within the scope of the specified TEE Context, in accordance with the parameters which have been set by the Client Application inside the sharedMem structure.",
      "parameters": [
        "context: a pointer to an initialized TEE Context",
        "sharedMem: a pointer to a Shared Memory structure to register."
      ],
      "return": [
        "0x00000000: the registration was successful.",
        "0xFFFF0000 - 0xFFFF0010: registration was not successful."
      ]
    },
    {
      "name": "TEEC_AllocateSharedMemory",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "TEEC_Result TEEC_AllocateSharedMemory(TEEC_Context* context, TEEC_SharedMemory* sharedMem)",
      "description": "This function allocates a new block of memory as a block of Shared Memory within the scope of the specified TEE Context, in accordance with the parameters which have been set by the Client Application inside the sharedMem structure.",
      "parameters": [
        "context: a pointer to an initialized TEE Context.",
        "sharedMem: a pointer to a Shared Memory structure to allocate."
      ],
      "return": [
        "0x00000000: the allocation was successful.",
        "0xFFFF0000 - 0xFFFF0010: allocation was not successful."
      ]
    },
    {
      "name": "TEEC_ReleaseSharedMemory",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "void TEEC_ReleaseSharedMemory(TEEC_SharedMemory* sharedMem)",
      "description": "This function deregisters or deallocates a previously initialized block of Shared Memory.",
      "parameters": [
        "sharedMem: a pointer to a valid Shared Memory structure."
      ],
      "return": []
    },
    {
      "name": "TEEC_OpenSession",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "TEEC_Result TEEC_OpenSession(TEEC_Context* context, TEEC_Session* session, const TEEC_UUID* destination, uint32_t  connectionMethod, const void* connectionData, TEEC_Operation* operation, uint32_t* returnOrigin)",
      "description": "This function open a Session between the Client Application and the specified Trusted Application.",
      "parameters": [
        "context: a pointer to an initialized TEE Context.",
        "session: a pointer to a Session structure to open.",
        "destination: a pointer to a structure containing the UUID of the destination Trusted Application.",
        "connectionMethod: the method of connection to use.",
        "connectionData: any necessary data required to support the connection method chosen.",
        "operation: a pointer to an Operation containing a set of Parameters to exchange with the Trusted Application, or NULL if no Parameters are to be exchanged or if the operation cannot be cancelled.",
        "returnOrigin: a pointer to a variable which will contain the return origin."
      ],
      "return": [
        "0x00000000: the session was successfully opened.",
        "0xFFFF0000 - 0xFFFF0010: the session opening failed."
      ]
    },
    {
      "name": "TEEC_CloseSession",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "void TEEC_CloseSession(TEEC_Session* session)",
      "description": "This function closes a Session which has been opened with a Trusted Application.",
      "parameters": [
        "session: the session to close."
      ],
      "return": []
    },
    {
      "name": "TEEC_InvokeCommand",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "TEEC_Result TEEC_InvokeCommand(TEEC_Session* session, uint32_t commandID, TEEC_Operation* operation, uint32_t*  returnOrigin)",
      "description": "This function invokes a Command within the specified Session.",
      "parameters": [
        "session: the open Session in which the command will be invoked.",
        "commandID: the identifier of the Command within the Trusted Application to invoke.",
        "operation: a pointer to a Client Application initialized TEEC_Operation structure, or NULL if there is no payload to send or if the Command does not need to support cancellation.",
        "returnOrigin: a pointer to a variable which will contain the return origin."
      ],
      "return": [
        "0x00000000: the initialization was successful.",
        "0xFFFF0000 - 0xFFFF0010:  initialization was not successful."
      ]
    },
    {
      "name": "TEEC_RequestCancellation",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "void TEEC_RequestCancellation(TEEC_Operation* operation)",
      "description": "This function requests the cancellation of a pending open Session operation or a Command invocation operation.",
      "parameters": [
        "operation: a pointer to a Client Application instantiated Operation structure."
      ],
      "return": []
    },
    {
      "name": "TEEC_PARAM_TYPES",
      "headerfile_desc": "tee_client_api.h",
      "func_name": "uint32_t TEEC_PARAM_TYPES(param0Type, param1Type, param2Type, param3Type)",
      "description": "This function-like macro builds a constant containing four Parameter types for use in the paramTypes field of a TEEC_Operation structure.",
      "parameters": [],
      "return": []
    },
    {
      "name": "TEE_GetPropertyAsString",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyAsString(TEE_PropSetHandle propsetOrEnumerator, [instringopt] char* name, [outstring] char* valueBuffer, size_t* valueBufferLen)",
      "description": "The TEE_GetPropertyAsString function performs a lookup in a property set to retrieve an individual property and convert its value into a printable string.",
      "parameters": [
        "propsetOrEnumerator: One of the TEE_PROPSET_XXX pseudo-handles or a handle on a property enumerator.",
        "name: A pointer to the zero-terminated string containing the name of the property to retrieve. Its content is case-sensitive and it SHALL be encoded in UTF-8.",
        "valueBuffer, valueBufferLen: Output buffer for the property value."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the property is not found or if name is not a valid UTF-8 encoding.",
        "TEE_ERROR_SHORT_BUFFER: If the value buffer is not large enough to hold the whole property value."
      ]
    },
    {
      "name": "TEE_GetPropertyAsBool",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyAsBool(TEE_PropSetHandle propsetOrEnumerator, [instringopt] char* name, [out] bool* value)",
      "description": "The TEE_GetPropertyAsBool function retrieves a single property in a property set and converts its value to a Boolean.",
      "parameters": [
        "propsetOrEnumerator: One of the TEE_PROPSET_XXX pseudo-handles or a handle on a property enumerator.",
        "name: A pointer to the zero-terminated string containing the name of the property to retrieve. Its content is case-sensitive and SHALL be encoded in UTF-8.",
        "value: A pointer to the variable that will contain the value of the property on success or false on error."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the property is not found or if name is not a valid UTF-8 encoding.",
        "TEE_ERROR_BAD_FORMAT: If the property value is not defined as a Boolean."
      ]
    },
    {
      "name": "TEE_GetPropertyAsU32",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyAsU32(TEE_PropSetHandle propsetOrEnumerator, [instringopt] char* name, [out] uint32_t* value)",
      "description": "The TEE_GetPropertyAsU32 function retrieves a single property in a property set and converts its value to a 32-bit unsigned integer.",
      "parameters": [
        "propsetOrEnumerator: One of the TEE_PROPSET_XXX pseudo-handles or a handle on a property enumerator.",
        "name: A pointer to the zero-terminated string containing the name of the property to retrieve. Its content is case-sensitive and SHALL be encoded in UTF-8.",
        "value: A pointer to the variable that will contain the value of the property on success, or zero on error."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the property is not found or if name is not a valid UTF-8 encoding.",
        "TEE_ERROR_BAD_FORMAT: If the property value is not defined as an unsigned 32-bit integer."
      ]
    },
    {
      "name": "TEE_GetPropertyAsU64",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyAsU64(TEE_PropSetHandle propsetOrEnumerator, [instringopt] char* name, [out] uint64_t* value)",
      "description": "The TEE_GetPropertyAsU64 function retrieves a single property in a property set and converts its value to a 64-bit unsigned integer.",
      "parameters": [
        "propsetOrEnumerator: One of the TEE_PROPSET_XXX pseudo-handles or a handle on a property enumerator.",
        "name: A pointer to the zero-terminated string containing the name of the property to retrieve. Its content is case-sensitive and SHALL be encoded in UTF-8.",
        "value: A pointer to the variable that will contain the value of the property on success, or zero on error."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the property is not found or if name is not a valid UTF-8 encoding.",
        "TEE_ERROR_BAD_FORMAT: If the property value is not defined as an unsigned 64-bit integer."
      ]
    },
    {
      "name": "TEE_GetPropertyAsBinaryBlock",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyAsBinaryBlock(TEE_PropSetHandle propsetOrEnumerator, [instringopt] char* name, [outbuf] void* valueBuffer, size_t* valueBufferLen)",
      "description": "The function TEE_GetPropertyAsBinaryBlock retrieves an individual property and converts its value into a binary block.",
      "parameters": [
        "propsetOrEnumerator: One of the TEE_PROPSET_XXX pseudo-handles or a handle on a property enumerator.",
        "name: A pointer to the zero-terminated string containing the name of the property to retrieve. Its content is case-sensitive and SHALL be encoded in UTF-8.",
        "valueBuffer, valueBufferLen: Output buffer for the binary block."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the property is not found or if name is not a valid UTF-8 encoding.",
        "TEE_ERROR_BAD_FORMAT: If the property cannot be retrieved as a binary block.",
        "TEE_ERROR_SHORT_BUFFER: If the value buffer is not large enough to hold the whole property value."
      ]
    },
    {
      "name": "TEE_GetPropertyAsUUID",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyAsUUID(TEE_PropSetHandle propsetOrEnumerator, [instringopt] char* name, [out] TEE_UUID* value)",
      "description": "The function TEE_GetPropertyAsUUID retrieves an individual property and converts its value into a UUID.",
      "parameters": [
        "propsetOrEnumerator: One of the TEE_PROPSET_XXX pseudo-handles or a handle on a property enumerator.",
        "name: A pointer to the zero-terminated string containing the name of the property to retrieve. Its content is case-sensitive and SHALL be encoded in UTF-8.",
        "value: A pointer filled with the UUID. SHALL NOT be NULL."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the property is not found or if name is not a valid UTF-8 encoding.",
        "TEE_ERROR_BAD_FORMAT: If the property cannot be converted into a UUID."
      ]
    },
    {
      "name": "TEE_GetPropertyAsIdentity",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyAsIdentity(TEE_PropSetHandle propsetOrEnumerator, [instringopt] char* name, [out] TEE_Identity* value)",
      "description": "The function TEE_GetPropertyAsIdentity retrieves an individual property and converts its value into a TEE_Identity.",
      "parameters": [
        "propsetOrEnumerator: One of the TEE_PROPSET_XXX pseudo-handles or a handle on a property enumerator.",
        "name: A pointer to the zero-terminated string containing the name of the property to retrieve. Its content is case-sensitive and SHALL be encoded in UTF-8.",
        "value: A pointer filled with the identity. SHALL NOT be NULL."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the property is not found or if name is not a valid UTF-8 encoding.",
        "TEE_ERROR_BAD_FORMAT: If the property value cannot be converted into an Identity."
      ]
    },
    {
      "name": "TEE_AllocatePropertyEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AllocatePropertyEnumerator([out] TEE_PropSetHandle* enumerator)",
      "description": "The function TEE_AllocatePropertyEnumerator allocates a property enumerator object.",
      "parameters": [
        "enumerator: A pointer filled with an opaque handle on the property enumerator on success and with TEE_HANDLE_NULL on error."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OUT_OF_MEMORY: If there are not enough resources to allocate the property enumerator."
      ]
    },
    {
      "name": "TEE_FreePropertyEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_FreePropertyEnumerator(TEE_PropSetHandle enumerator)",
      "description": "The function TEE_FreePropertyEnumerator deallocates a property enumerator object.",
      "parameters": [
        "enumerator: A handle on the enumerator to free."
      ],
      "return": []
    },
    {
      "name": "TEE_StartPropertyEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_StartPropertyEnumerator(TEE_PropSetHandle enumerator, TEE_PropSetHandle propSet)",
      "description": "The function TEE_StartPropertyEnumerator starts to enumerate the properties in an enumerator.",
      "parameters": [
        "enumerator: A handle on the enumerator.",
        "propSet: A pseudo-handle on the property set to enumerate. SHALL be one of the TEE_PROPSET_XXX pseudo-handles."
      ],
      "return": []
    },
    {
      "name": "TEE_ResetPropertyEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_ResetPropertyEnumerator(TEE_PropSetHandle enumerator)",
      "description": "The function TEE_ResetPropertyEnumerator resets a property enumerator to its state immediately after allocation.",
      "parameters": [
        "enumerator: A handle on the enumerator to reset."
      ],
      "return": []
    },
    {
      "name": "TEE_GetPropertyName",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetPropertyName(TEE_PropSetHandle enumerator, [outstring] void* nameBuffer, size_t* nameBufferLen)",
      "description": "The function TEE_GetPropertyName gets the name of the current property in an enumerator.",
      "parameters": [
        "enumerator: A handle on the enumerator.",
        "nameBuffer, nameBufferLen: The buffer filled with the name."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If there is no current property either because the enumerator has not started or because it has reached the end of the property set.",
        "TEE_ERROR_SHORT_BUFFER: If the name buffer is not large enough to contain the property name."
      ]
    },
    {
      "name": "TEE_GetNextProperty",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetNextProperty(TEE_PropSetHandle enumerator)",
      "description": "The function TEE_GetNextProperty advances the enumerator to the next property.",
      "parameters": [
        "enumerator: A handle on the enumerator."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the enumerator has reached the end of the property set or if it has not started."
      ]
    },
    {
      "name": "TEE_Panic",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_Panic(TEE_Result panicCode)",
      "description": "The TEE_Panic function raises a Panic in the Trusted Application instance.",
      "parameters": [
        "panicCode: An informative panic code defined by the TA. May be displayed in traces if traces are available."
      ],
      "return": []
    },
    {
      "name": "TEE_OpenTASession",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_OpenTASession([in] TEE_UUID* destination, uint32_t cancellationRequestTimeout, uint32_t paramTypes, [inout] TEE_Param params[4], [out] TEE_TASessionHandle* session, [out] uint32_t* returnOrigin)",
      "description": "The function TEE_OpenTASession opens a new session with a Trusted Application.",
      "parameters": [
        "destination: A pointer to a TEE_UUID structure containing the UUID of the destination Trusted Application.",
        "cancellationRequestTimeout: Timeout in milliseconds or the special value TEE_TIMEOUT_INFINITE if there is no timeout. After the timeout expires, the TEE SHALL act as though a cancellation request for the operation had been sent.",
        "paramTypes: The types of all parameters passed in the operation.",
        "params: The parameters passed in the operation.",
        "session: A pointer to a variable that will receive the client session handle.",
        "returnOrigin: A pointer to a variable which will contain the return origin."
      ],
      "return": [
        "TEE_SUCCESS: In case of success; the session was successfully opened.",
        "TEE_ERROR_OUT_OF_MEMORY: If not enough resources are available to open the session.",
        "TEE_ERROR_ITEM_NOT_FOUND: If no Trusted Application matches the requested destination UUID.",
        "TEE_ERROR_ACCESS_DENIED: If access to the destination Trusted Application is denied.",
        "TEE_ERROR_BUSY: If the destination Trusted Application does not allow more than one session at a time and already has a session in progress.",
        "TEE_ERROR_TARGET_DEAD: If the destination Trusted Application has panicked during the operation.",
        "TEE_ERROR_CANCEL: If the request is cancelled by anything other than the destination Trusted Application."
      ]
    },
    {
      "name": "TEE_CloseTASession",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_CloseTASession(TEE_TASessionHandle session)",
      "description": "The function TEE_CloseTASession closes a client session.",
      "parameters": [
        "session: An opened session handle."
      ],
      "return": []
    },
    {
      "name": "TEE_InvokeTACommand",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_InvokeTACommand(TEE_TASessionHandle session, uint32_t cancellationRequestTimeout, uint32_t commandID, uint32_t paramTypes, [inout] TEE_Param params[4], [out] uint32_t* returnOrigin)",
      "description": "The function TEE_InvokeTACommand invokes a command within a session opened between the client Trusted Application instance and a destination Trusted Application instance.",
      "parameters": [
        "session: An opened session handle.",
        "cancellationRequestTimeout: Timeout in milliseconds or the special value TEE_TIMEOUT_INFINITE if there is no timeout.",
        "commandID: The identifier of the Command to invoke.",
        "paramTypes: The types of all parameters passed in the operation.",
        "params: The parameters passed in the operation.",
        "returnOrigin: A pointer to a variable which will contain the return origin."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OUT_OF_MEMORY: If not enough resources are available to perform the operation.",
        "TEE_ERROR_TARGET_DEAD: If the destination Trusted Application has panicked during the operation.",
        "TEE_ERROR_CANCEL: If the request is cancelled by anything other than the destination Trusted Application."
      ]
    },
    {
      "name": "TEE_GetCancellationFlag",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "bool TEE_GetCancellationFlag(void)",
      "description": "The TEE_GetCancellationFlag function determines whether the current task’s Cancellation Flag is set.",
      "parameters": [],
      "return": [
        "false if the Cancellation Flag is not set or if cancellations are masked.",
        "true if the Cancellation Flag is set and cancellations are not masked."
      ]
    },
    {
      "name": "TEE_UnmaskCancellation",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "bool TEE_UnmaskCancellation(void)",
      "description": "The TEE_UnmaskCancellation function unmasks the effects of cancellation for the current task.",
      "parameters": [],
      "return": [
        "true if cancellations were masked prior to calling this function.",
        "false otherwise."
      ]
    },
    {
      "name": "TEE_MaskCancellation",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "bool TEE_MaskCancellation(void)",
      "description": "The TEE_MaskCancellation function masks the effects of cancellation for the current task.",
      "parameters": [],
      "return": [
        "true if cancellations were masked prior to calling this function.",
        "false otherwise."
      ]
    },
    {
      "name": "TEE_CheckMemoryAccessRights",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_CheckMemoryAccessRights(uint32_t accessFlags, [inbuf] void* buffer, size_t size)",
      "description": "The TEE_CheckMemoryAccessRights function causes the Implementation to examine a buffer of memory specified in the parameters buffer and size and to determine whether the current Trusted Application instance has the access rights requested in the parameter accessFlags.",
      "parameters": [
        "accessFlags: The access flags to check.",
        "buffer, size: The description of the buffer to check."
      ],
      "return": [
        "TEE_SUCCESS: If the entire buffer allows the requested accesses.",
        "TEE_ERROR_ACCESS_DENIED: If at least one byte in the buffer is not accessible with the requested accesses."
      ]
    },
    {
      "name": "TEE_SetInstanceData",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_SetInstanceData([ctx] void* instanceData)",
      "description": "The TEE_SetInstanceData and TEE_GetInstanceData functions provide an alternative to writable global data (writable variables with global scope and writable static variables with global or function scope).",
      "parameters": [
        "instanceData: A pointer to the global Trusted Application instance data. This pointer may be NULL."
      ],
      "return": []
    },
    {
      "name": "TEE_GetInstanceData",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void* TEE_GetInstanceData(void)",
      "description": "The TEE_GetInstanceData function retrieves the instance data pointer set by the Trusted Application using the TEE_SetInstanceData function.",
      "parameters": [],
      "return": [
        "The value returned is the previously set pointer to the Trusted Application instance data, or NULL if no instance data pointer has yet been set."
      ]
    },
    {
      "name": "TEE_Malloc",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void* TEE_Malloc(size_t size, uint32_t hint)",
      "description": "The TEE_Malloc function allocates space for an object whose size in bytes is specified in the parameter size.",
      "parameters": [
        "size: The size of the buffer to be allocated.",
        "hint: A hint to the allocator."
      ],
      "return": [
        "Upon successful completion, with size not equal to zero, the function returns a pointer to the allocated space."
      ]
    },
    {
      "name": "TEE_Realloc",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void* TEE_Realloc([inout] void* buffer, size_t newSize)",
      "description": "The TEE_Realloc function changes the size of the memory object pointed to by buffer to the size specified by newSize.",
      "parameters": [
        "buffer: The pointer to the object to be reallocated.",
        "newSize: The new size required for the object."
      ],
      "return": [
        "Upon successful completion, TEE_Realloc returns a pointer to the (possibly moved) allocated space."
      ]
    },
    {
      "name": "TEE_Free",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_Free(void *buffer)",
      "description": "The TEE_Free function causes the space pointed to by buffer to be deallocated; that is, made available for further allocation.",
      "parameters": [
        "buffer: The pointer to the memory block to be freed."
      ],
      "return": []
    },
    {
      "name": "TEE_MemMove",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_MemMove([outbuf(size)] void* dest, [inbuf(size)] void* src, size_t size)",
      "description": "The TEE_MemMove function copies size bytes from the buffer pointed to by src into the buffer pointed to by dest.",
      "parameters": [
        "dest: A pointer to the destination buffer.",
        "src: A pointer to the source buffer.",
        "size: The number of bytes to be copied."
      ],
      "return": []
    },
    {
      "name": "TEE_MemCompare",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "int32_t TEE_MemCompare([inbuf(size)] void* buffer1, [inbuf(size)] void* buffer2, size_t size)",
      "description": "The TEE_MemCompare function compares the first size bytes of the buffer pointed to by buffer1 to the first size bytes of the buffer pointed to by buffer2.",
      "parameters": [
        "buffer1: A pointer to the first buffer.",
        "buffer2: A pointer to the second buffer.",
        "size: The number of bytes to be compared."
      ],
      "return": [
        "The sign of a non-zero return value is determined by the sign of the difference between the values of the first pair of bytes (both interpreted as type uint8_t) that differ in the objects being compared."
      ]
    },
    {
      "name": "TEE_MemFill",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_MemFill([outbuf(size)] void* buffer, uint8_t x, size_t size)",
      "description": "The TEE_MemFill function writes the byte x into the first size bytes of the buffer pointed to by buffer.",
      "parameters": [
        "buffer: A pointer to the destination buffer",
        "x: The value to be set",
        "size: The number of bytes to be set"
      ],
      "return": []
    },
    {
      "name": "TEE_GetObjectInfo1",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetObjectInfo1(TEE_ObjectHandle object, [out] TEE_ObjectInfo* objectInfo)",
      "description": "This function replaces the TEE_GetObjectInfo function, whose use is deprecated.",
      "parameters": [
        "object: Handle of the object.",
        "objectInfo: Pointer to a structure filled with the object information."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_CORRUPT_OBJECT: If the persistent object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible"
      ]
    },
    {
      "name": "TEE_RestrictObjectUsage1",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_RestrictObjectUsage1(TEE_ObjectHandle object, uint32_t objectUsage)",
      "description": "This function replaces the TEE_RestrictObjectInfo function, whose use is deprecated.",
      "parameters": [
        "object: Handle on an object.",
        "objectUsage: New object usage, an OR combination of one or more of the TEE_USAGE_XXX constants defined in Table 5-4."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_CORRUPT_OBJECT: If the persistent object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_GetObjectBufferAttribute",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetObjectBufferAttribute(TEE_ObjectHandle object, uint32_t attributeID, [outbuf] void* buffer, size_t* size)",
      "description": "The TEE_GetObjectBufferAttribute function extracts one buffer attribute from an object.",
      "parameters": [
        "object: Handle of the object.",
        "attributeID: Identifier of the attribute to retrieve.",
        "buffer, size: Output buffer to get the content of the attribute."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the attribute is not found on this object",
        "TEE_ERROR_SHORT_BUFFER: If buffer is NULL or too small to contain the key part",
        "TEE_ERROR_CORRUPT_OBJECT: If the persistent object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_GetObjectValueAttribute",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetObjectValueAttribute(TEE_ObjectHandle object, uint32_t attributeID, [outopt] uint32_t* a, [outopt] uint32_t* b)",
      "description": "The TEE_GetObjectValueAttribute function extracts a value attribute from an object.",
      "parameters": [
        "object: Handle of the object",
        "attributeID: Identifier of the attribute to retrieve",
        "a, b: Pointers on the placeholders filled with the attribute fields a and b. Each can be NULL if the corresponding field is not of interest to the caller."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the attribute is not found on this object.",
        "TEE_ERROR_ACCESS_DENIED: Deprecated: Handled by a panic.",
        "TEE_ERROR_CORRUPT_OBJECT: If the persistent object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_CloseObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_CloseObject(TEE_ObjectHandle object)",
      "description": "The TEE_CloseObject function closes an opened object handle. The object can be persistent or transient.",
      "parameters": [
        "object: Handle on the object to close. If set to TEE_HANDLE_NULL, does nothing."
      ],
      "return": []
    },
    {
      "name": "TEE_AllocateTransientObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AllocateTransientObject(uint32_t objectType, uint32_t maxObjectSize, [out] TEE_ObjectHandle* object)",
      "description": "The TEE_AllocateTransientObject function allocates an uninitialized transient object, i.e. a container for attributes.",
      "parameters": [
        "objectType: Type of uninitialized object container to be created.",
        "maxObjectSize: Key Size of the object.",
        "object: Filled with a handle on the newly created key container."
      ],
      "return": [
        "TEE_SUCCESS: On success.",
        "TEE_ERROR_OUT_OF_MEMORY: If not enough resources are available to allocate the object handle.",
        "TEE_ERROR_NOT_SUPPORTED: If the key size is not supported or the object type is not supported."
      ]
    },
    {
      "name": "TEE_FreeTransientObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_FreeTransientObject(TEE_ObjectHandle object)",
      "description": "The TEE_FreeTransientObject function deallocates a transient object previously allocated with TEE_AllocateTransientObject.",
      "parameters": [
        "object: Handle on the object to free."
      ],
      "return": []
    },
    {
      "name": "TEE_ResetTransientObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_ResetTransientObject(TEE_ObjectHandle object)",
      "description": "The TEE_ResetTransientObject function resets a transient object to its initial state after allocation.",
      "parameters": [
        "object: Handle on a transient object to reset."
      ],
      "return": []
    },
    {
      "name": "TEE_PopulateTransientObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_PopulateTransientObject(TEE_ObjectHandle object, [in] TEE_Attribute* attrs, uint32_t attrCount)",
      "description": "The TEE_PopulateTransientObject function populates an uninitialized object container with object attributes passed by the TA in the attrs parameter.",
      "parameters": [
        "object: Handle on an already created transient and uninitialized object.",
        "attrs, attrCount: Array of object attributes."
      ],
      "return": [
        "TEE_SUCCESS: In case of success. In this case, the content of the object SHALL be initialized.",
        "TEE_ERROR_BAD_PARAMETERS: If an incorrect or inconsistent attribute value is detected. In this case, the content of the object SHALL remain uninitialized."
      ]
    },
    {
      "name": "TEE_InitRefAttribute",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_InitRefAttribute([out] TEE_Attribute* attr, uint32_t attributeID, [inbuf] void* buffer, size_t length); void TEE_InitValueAttribute([out] TEE_Attribute* attr, uint32_t attributeID, uint32_t a, uint32_t b)",
      "description": "The TEE_InitRefAttribute functions can be used to populate a single attribute either with a reference to a buffer or with integer values.",
      "parameters": [
        "attr: attribute structure to initialize.",
        "attributeID: Identifier of the attribute to populate.",
        "buffer, length: Input buffer that holds the content of the attribute.",
        "a: unsigned integer value to assign to the a member of the attribute structure.",
        "b: unsigned integer value to assign to the b member of the attribute structure."
      ],
      "return": []
    },
    {
      "name": "TEE_InitValueAttribute",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_InitValueAttribute([out] TEE_Attribute* attr, uint32_t attributeID, uint32_t a, uint32_t b)",
      "description": "The TEE_InitValueAttribute functions can be used to populate a single attribute either with a reference to a buffer or with integer values.",
      "parameters": [
        "attr: attribute structure to initialize",
        "attributeID: Identifier of the attribute to populate.",
        "buffer, length: Input buffer that holds the content of the attribute.",
        "a: unsigned integer value to assign to the a member of the attribute structure.",
        "b: unsigned integer value to assign to the b member of the attribute structure."
      ],
      "return": []
    },
    {
      "name": "TEE_CopyObjectAttributes1",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_CopyObjectAttributes1([out] TEE_ObjectHandle destObject, [in] TEE_ObjectHandle srcObject)",
      "description": "This function replaces the TEE_CopyObjectAttributes function, whose use is deprecated.",
      "parameters": [
        "destObject: Handle on an uninitialized transient object.",
        "srcObject: Handle on an initialized object."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_CORRUPT_OBJECT: If the persistent object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_GenerateKey",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GenerateKey(TEE_ObjectHandle object, uint32_t keySize, [in] TEE_Attribute* params, uint32_t paramCount)",
      "description": "The TEE_GenerateKey function generates a random key or a key-pair and populates a transient key object with the generated key material.",
      "parameters": [
        "object: Handle on an uninitialized transient key to populate with the generated key.",
        "keySize: Requested key size.",
        "params, paramCount: Parameters for the key generation."
      ],
      "return": [
        "TEE_SUCCESS: On success.",
        "TEE_ERROR_BAD_PARAMETERS: If an incorrect or inconsistent attribute is detected."
      ]
    },
    {
      "name": "TEE_OpenPersistentObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_OpenPersistentObject(uint32_t storageID, [in(objectIDLength)] void* objectID, size_t objectIDLen, uint32_t flags, [out] TEE_ObjectHandle* object)",
      "description": "The TEE_OpenPersistentObject function opens a handle on an existing persistent object.",
      "parameters": [
        "storageID: The storage to use.",
        "objectID, objectIDLen: The object identifier.",
        "flags: The flags which determine the settings under which the object is opened.",
        "object: A pointer to the handle, which contains the opened handle upon successful completion."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the storage denoted by storageID does not exist or if the object identifier cannot be found in the storage.",
        "TEE_ERROR_ACCESS_CONFLICT: If an access right conflict was detected while opening the object.",
        "TEE_ERROR_OUT_OF_MEMORY: If there is not enough memory to complete the operation.",
        "TEE_ERROR_CORRUPT_OBJECT: If the storage or object is corrupt.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_CreatePersistentObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_CreatePersistentObject(uint32_t storageID, [in(objectIDLength)] void* objectID, size_t objectIDLen, uint32_t flags, TEE_ObjectHandle attributes, [inbuf] void* initialData, size_t initialDataLen, [out] TEE_ObjectHandle* object)",
      "description": "The TEE_CreatePersistentObject function creates a persistent object with initial attributes and an initial data stream content, and optionally returns either a handle on the created object, or TEE_HANDLE_NULL upon failure.",
      "parameters": [
        "storageID: The storage to use.",
        "objectID, objectIDLen: The object identifier.",
        "flags: The flags which determine the settings under which the object is opened.",
        "attributes: A handle on a persistent object or an initialized transient object from which to take the persistent object attributes.",
        "initialData, initialDataLen: The initial data content of the persistent object.",
        "object: A pointer to the handle, which contains the opened handle upon successful completion."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the storage denoted by storageID does not exist",
        "TEE_ERROR_ACCESS_CONFLICT: If an access right conflict was detected while opening the object",
        "TEE_ERROR_OUT_OF_MEMORY: If there is not enough memory to complete the operation",
        "TEE_ERROR_STORAGE_NO_SPACE: If insufficient space is available to create the persistent object.",
        "TEE_ERROR_CORRUPT_OBJECT: If the storage is corrupt.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_CloseAndDeletePersistentObject1",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_CloseAndDeletePersistentObject1(TEE_ObjectHandle object)",
      "description": "This function replaces the TEE_CloseAndDeletePersistentObject function, whose use is deprecated.",
      "parameters": [
        "object: The object handle."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_RenamePersistentObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_RenamePersistentObject(TEE_ObjectHandle object, [in(newObjectIDLen)] void* newObjectID, size_t newObjectIDLen)",
      "description": "The function TEE_RenamePersistentObject changes the identifier of an object.",
      "parameters": [
        "object: The object handle.",
        "newObjectID, newObjectIDLen: A buffer containing the new object identifier."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ACCESS_CONFLICT: If an object with the same identifier already exists.",
        "TEE_ERROR_CORRUPT_OBJECT: If the object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_AllocatePersistentObjectEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AllocatePersistentObjectEnumerator([out] TEE_ObjectEnumHandle* objectEnumerator)",
      "description": "The TEE_AllocatePersistentObjectEnumerator function allocates a handle on an object enumerator.",
      "parameters": [
        "objectEnumerator: A pointer filled with the newly-allocated object enumerator handle on success."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OUT_OF_MEMORY: If there is not enough memory to allocate the enumerator handle."
      ]
    },
    {
      "name": "TEE_FreePersistentObjectEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_FreePersistentObjectEnumerator(TEE_ObjectEnumHandle objectEnumerator)",
      "description": "The TEE_FreePersistentObjectEnumerator function deallocates all resources associated with an object enumerator handle.",
      "parameters": [
        "objectEnumerator: The handle to close."
      ],
      "return": []
    },
    {
      "name": "TEE_ResetPersistentObjectEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_ResetPersistentObjectEnumerator(TEE_ObjectEnumHandle objectEnumerator)",
      "description": "The TEE_ResetPersistentObjectEnumerator function resets an object enumerator handle to its initial state after allocation.",
      "parameters": [
        "objectEnumerator: The handle to reset."
      ],
      "return": []
    },
    {
      "name": "TEE_StartPersistentObjectEnumerator",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_StartPersistentObjectEnumerator(TEE_ObjectEnumHandle objectEnumerator, uint32_t storageID)",
      "description": "The TEE_StartPersistentObjectEnumerator function starts the enumeration of all the persistent objects in a given Trusted Storage.",
      "parameters": [
        "objectEnumerator: A valid handle on an object enumerator.",
        "storageID: The identifier of the storage in which the objects SHALL be enumerated."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If the storage does not exist or if there is no object in the specified storage",
        "TEE_ERROR_CORRUPT_OBJECT: If the storage is corrupt",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_GetNextPersistentObject",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetNextPersistentObject(TEE_ObjectEnumHandle objectEnumerator, [out] TEE_ObjectInfo* objectInfo, [out] void* objectID, [out] size_t* objectIDLen)",
      "description": "",
      "parameters": [
        "The TEE_GetNextPersistentObject function gets the next object in an enumeration and returns information about the object: type, size, identifier, etc.",
        "objectEnumerator: A handle on the object enumeration.",
        "objectInfo: A pointer to a TEE_ObjectInfo filled with the object information as specified in the function TEE_GetObjectInfo1.",
        "objectID: Pointer to an array able to hold at least TEE_OBJECT_ID_MAX_LEN bytes.",
        "objectIDLen: Filled with the size of the object identifier."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_ITEM_NOT_FOUND: If there are no more elements in the object enumeration or if no enumeration is started on this handle.",
        "TEE_ERROR_CORRUPT_OBJECT: If the storage or returned object is corrupt.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_ReadObjectData",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_ReadObjectData(TEE_ObjectHandle object, [out] void* buffer, size_t size, [out] uint32_t* count)",
      "description": "The TEE_ReadObjectData function attempts to read size bytes from the data stream associated with the object object into the buffer pointed to by buffer.",
      "parameters": [
        "object: The object handle.",
        "buffer: A pointer to the memory which, upon successful completion, contains the bytes read.",
        "size: The number of bytes to read.",
        "count: A pointer to the variable which upon successful completion contains the number of bytes read."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_CORRUPT_OBJECT: If the object is corrupt.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_WriteObjectData",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_WriteObjectData(TEE_ObjectHandle object, [in] void* buffer, size_t size)",
      "description": "The TEE_WriteObjectData function writes size bytes from the buffer pointed to by buffer to the data stream associated with the open object handle object.",
      "parameters": [
        "object: The object handle.",
        "buffer: The buffer containing the data to be written.",
        "size: The number of bytes to writ."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_STORAGE_NO_SPACE: If insufficient storage space is available",
        "TEE_ERROR_OVERFLOW: If the value of the data position indicator resulting from this operation would be greater than TEE_DATA_MAX_POSITION.",
        "TEE_ERROR_CORRUPT_OBJECT: If the object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_TruncateObjectData",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_TruncateObjectData(TEE_ObjectHandle object, uint32_t size)",
      "description": "The function TEE_TruncateObjectData changes the size of a data stream.",
      "parameters": [
        "object: The object handle.",
        "size: The new size of the data stream."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_STORAGE_NO_SPACE: If insufficient storage space is available to perform the operation.",
        "TEE_ERROR_CORRUPT_OBJECT: If the object is corrupt.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_SeekObjectData",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_SeekObjectData(TEE_ObjectHandle object, int32_t offset, TEE_Whence whence)",
      "description": "The TEE_SeekObjectData function sets the data position indicator associated with the object handle.",
      "parameters": [
        "object: The object handle.",
        "offset: The number of bytes to move the data position.",
        "whence: The position in the data stream from which to calculate the new position."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OVERFLOW: If the value of the data position indicator resulting from this operation would be greater than TEE_DATA_MAX_POSITION.",
        "TEE_ERROR_CORRUPT_OBJECT: If the object is corrupt.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_AllocateOperation",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AllocateOperation(TEE_OperationHandle* operation, uint32_t algorithm, uint32_t mode, uint32_t maxKeySize)",
      "description": "The TEE_AllocateOperation function allocates a handle for a new cryptographic operation and sets the mode and algorithm type.",
      "parameters": [
        "operation: Reference to generated operation handle.",
        "algorithm: One of the cipher algorithms.",
        "mode: The operation mode.",
        "maxKeySize: Maximum key size in bits for the operation."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OUT_OF_MEMORY: If there are not enough resources to allocate the operation.",
        "TEE_ERROR_NOT_SUPPORTED: If the mode is not compatible with the algorithm or key size or if the algorithm is not one of the listed algorithms or if maxKeySize is not appropriate for the algorithm."
      ]
    },
    {
      "name": "TEE_FreeOperation",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_FreeOperation(TEE_OperationHandle operation)",
      "description": "The TEE_FreeOperation function deallocates all resources associated with an operation handle.",
      "parameters": [
        "operation: Reference to operation handle."
      ],
      "return": []
    },
    {
      "name": "TEE_GetOperationInfo",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_GetOperationInfo(TEE_OperationHandle operation, [out] TEE_OperationInfo* operationInfo)",
      "description": "The TEE_GetOperationInfo function returns information about an operation handle.",
      "parameters": [
        "operation: Handle on the operation.",
        "operationInfo: Pointer to a structure filled with the operation information."
      ],
      "return": []
    },
    {
      "name": "TEE_GetOperationInfoMultiple",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetOperationInfoMultiple(TEE_OperationHandle operation, [outbuf] TEE_OperationInfoMultiple* operationInfoMultiple, size_t* operationSize)",
      "description": "The TEE_GetOperationInfoMultiple function returns information about an operation handle.",
      "parameters": [
        "operation: Handle on the operation.",
        "operationInfoMultiple, operationSize: Buffer filled with the operation information."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the operationInfo buffer is not large enough to hold a TEE_OperationInfoMultiple structure containing the number of keys required by a TEE_Operation of the type supplied."
      ]
    },
    {
      "name": "TEE_ResetOperation",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_ResetOperation(TEE_OperationHandle operation)",
      "description": "For a multi-stage operation, the TEE_ResetOperation function resets the TEE_OperationHandle to the state after the initial TEE_AllocateOperation call with the addition of any keys which were configured subsequent to this so that the TEE_OperationHandle can be reused with the same keys.",
      "parameters": [
        "operation: Handle on the operation."
      ],
      "return": []
    },
    {
      "name": "TEE_SetOperationKey",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_SetOperationKey(TEE_OperationHandle operation, [in] TEE_ObjectHandle key)",
      "description": "The TEE_SetOperationKey function programs the key of an operation; that is, it associates an operation with a key.",
      "parameters": [
        "operation: Operation handle.",
        "key: A handle on a key object."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_CORRUPT_OBJECT: If the object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the persistent object is stored in a storage area which is currently inaccessible."
      ]
    },
    {
      "name": "TEE_SetOperationKey2",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_SetOperationKey2(TEE_OperationHandle operation, [in] TEE_ObjectHandle key1, [in] TEE_ObjectHandle key2)",
      "description": "The TEE_SetOperationKey2 function initializes an existing operation with two keys.",
      "parameters": [
        "operation: Operation handle.",
        "key1: A handle on a key object.",
        "key2: A handle on a key object."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_CORRUPT_OBJECT: If the key1 object is corrupt. The object handle is closed.",
        "TEE_ERROR_CORRUPT_OBJECT_2: If the key2 object is corrupt. The object handle is closed.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE: If the key1 object is stored in a storage area which is currently inaccessible.",
        "TEE_ERROR_STORAGE_NOT_AVAILABLE_2: If the key2 object is stored in a storage area which is currently inaccessible.",
        "TEE_ERROR_SECURITY: If the key1 object and the key2 object are the same."
      ]
    },
    {
      "name": "TEE_CopyOperation",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_CopyOperation([out] TEE_OperationHandle dstOperation, [in] TEE_OperationHandle srcOperation)",
      "description": "The TEE_CopyOperation function copies an operation state from one operation handle into another operation handle.",
      "parameters": [
        "dstOperation: Handle on the destination operation.",
        "srcOperation: Handle on the source operation."
      ],
      "return": []
    },
    {
      "name": "TEE_IsAlgorithmSupported",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_IsAlgorithmSupported([in] uint32_t algId [in] uint32_t element)",
      "description": "The TEE_IsAlgorithmSupported function can be used to determine whether a combination of algId and element is supported.",
      "parameters": [
        "algId: An algorithm identifier.",
        "element: A cryptographic element."
      ],
      "return": [
        "TEE_SUCCESS: The requested combination of algId and element is supported.",
        "TEE_ERROR_NOT_SUPPORTED: The requested combination of algId and element is not supported."
      ]
    },
    {
      "name": "TEE_DigestUpdate",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_DigestUpdate(TEE_OperationHandle operation, [inbuf] void* chunk, size_t chunkSize)",
      "description": "The TEE_DigestUpdate function accumulates message data for hashing.",
      "parameters": [
        "operation: Handle of a running Message Digest operation.",
        "chunk, chunkSize: Chunk of data to be hashed."
      ],
      "return": []
    },
    {
      "name": "TEE_DigestDoFinal",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_DigestDoFinal(TEE_OperationHandle operation, [inbuf] void* chunk, size_t chunkLen, [outbuf] void* hash, size_t *hashLen)",
      "description": "The TEE_DigestDoFinal function finalizes the message digest operation and produces the message hash.",
      "parameters": [
        "operation: Handle of a running Message Digest operation",
        "chunk, chunkLen: Last chunk of data to be hashed",
        "hash, hashLen: Output buffer filled with the message hash"
      ],
      "return": [
        "TEE_SUCCESS: On success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is too small. In this case, the operation is not finalized."
      ]
    },
    {
      "name": "TEE_CipherInit",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_CipherInit(TEE_OperationHandle operation, [inbuf] void* IV, size_t IVLen)",
      "description": "The TEE_CipherInit function starts the symmetric cipher operation.",
      "parameters": [
        "operation: A handle on an opened cipher operation setup with a key",
        "IV, IVLen: Buffer containing the operation Initialization Vector as appropriate."
      ],
      "return": []
    },
    {
      "name": "TEE_CipherUpdate",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_CipherUpdate(TEE_OperationHandle operation, [inbuf] void* srcData, size_t srcLen, [outbuf] void* destData, size_t *destLen)",
      "description": "The TEE_CipherUpdate function encrypts or decrypts input data.",
      "parameters": [
        "operation: Handle of a running Cipher operation.",
        "srcData, srcLen: Input data buffer to be encrypted or decrypted.",
        "destData, destLen: Output buffer."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is not large enough to contain the output."
      ]
    },
    {
      "name": "TEE_CipherDoFinal",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_CipherDoFinal(TEE_OperationHandle operation, [inbuf] void* srcData, size_t srcLen, [outbufopt] void* destData, size_t *destLen)",
      "description": "The TEE_CipherDoFinal function finalizes the cipher operation, processing data that has notbeen processed by previous calls to TEE_CipherUpdate as well as data supplied in srcData.",
      "parameters": [
        "operation: Handle of a running Cipher operation.",
        "srcData, srcLen: Reference to final chunk of input data to be encrypted or decrypted.",
        "destData, destLen: Output buffer."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is not large enough to contain the output."
      ]
    },
    {
      "name": "TEE_MACInit",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_MACInit(TEE_OperationHandle operation, [inbuf] void* IV, size_t IVLen)",
      "description": "The TEE_MACInit function initializes a MAC operation.",
      "parameters": [
        "operation: Operation handle",
        "IV, IVLen: Input buffer containing the operation Initialization Vector, if applicable."
      ],
      "return": []
    },
    {
      "name": "TEE_MACUpdate",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_MACUpdate(TEE_OperationHandle operation, [inbuf] void* chunk, size_t chunkSize)",
      "description": "The TEE_MACUpdate function accumulates data for a MAC calculation.",
      "parameters": [
        "operation: Handle of a running MAC operation.",
        "chunk, chunkSize: Chunk of the message to be MACed."
      ],
      "return": []
    },
    {
      "name": "TEE_MACComputeFinal",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_MACComputeFinal(TEE_OperationHandle operation, [inbuf] void* message, size_t messageLen, [outbuf] void* mac, size_t *macLen)",
      "description": "The TEE_MACComputeFinal function finalizes the MAC operation with a last chunk of message, and computes the MAC.",
      "parameters": [
        "operation: Handle of a MAC operation.",
        "message, messageLen: Input buffer containing a last message chunk to MAC.",
        "mac, macLen: Output buffer filled with the computed MAC."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is not large enough to contain the computed MAC."
      ]
    },
    {
      "name": "TEE_MACCompareFinal",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_MACCompareFinal(TEE_OperationHandle operation, [inbuf] void* message, size_t messageLen, [inbuf] void* mac, size_t macLen)",
      "description": "The TEE_MACCompareFinal function finalizes the MAC operation and compares the MAC with the buffer passed to the function.",
      "parameters": [
        "operation: Handle of a MAC operation.",
        "message, messageLen: Input buffer containing the last message chunk to MAC.",
        "mac, macLen: Input buffer containing the MAC to check."
      ],
      "return": [
        "TEE_SUCCESS: If the computed MAC corresponds to the MAC passed in the parameter mac.",
        "TEE_ERROR_MAC_INVALID: If the computed MAC does not correspond to the value passed in the parameter mac."
      ]
    },
    {
      "name": "TEE_AEInit",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AEInit(TEE_OperationHandle operation, [inbuf] void* nonce, size_t nonceLen, uint32_t tagLen, uint32_t AADLen, uint32_t payloadLen)",
      "description": "The TEE_AEInit function initializes an Authentication Encryption operation.",
      "parameters": [
        "operation: A handle on the operation.",
        "nonce, nonceLen: The operation nonce or IV.",
        "tagLen: Size in bits of the tag.",
        "AADLen: Length in bytes of the AAD.",
        "payloadLen: Length in bytes of the payload."
      ],
      "return": [
        "TEE_SUCCESS: On success.",
        "TEE_ERROR_NOT_SUPPORTED: If the tag length is not supported by the algorithm."
      ]
    },
    {
      "name": "TEE_AEUpdateAAD",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_AEUpdateAAD(TEE_OperationHandle operation, [inbuf] void* AADdata, size_t AADdataLen)",
      "description": "The TEE_AEUpdateAAD function feeds a new chunk of Additional Authentication Data (AAD) to the AE operation.",
      "parameters": [
        "operation: Handle on the AE operation.",
        "AADdata, AADdataLen: Input buffer containing the chunk of AAD."
      ],
      "return": []
    },
    {
      "name": "TEE_AEUpdate",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AEUpdate(TEE_OperationHandle operation, [inbuf] void* srcData, size_t srcLen, [outbuf] void* destData, size_t *destLen)",
      "description": "The TEE_AEUpdate function accumulates data for an Authentication Encryption operation.",
      "parameters": [
        "operation: Handle of a running AE operation.",
        "srcData, srcLen: Input data buffer to be encrypted or decrypted.",
        "destData, destLen: Output buffer."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is not large enough to contain the output."
      ]
    },
    {
      "name": "TEE_AEEncryptFinal",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AEEncryptFinal(TEE_OperationHandle operation, [inbuf] void* srcData, size_t srcLen, [outbuf] void* destData, size_t* destLen, [outbuf] void* tag, size_t* tagLen)",
      "description": "The TEE_AEEncryptFinal function processes data that has not been processed by previous calls to TEE_AEUpdate as well as data supplied in srcData.",
      "parameters": [
        "operation: Handle of a running AE operation",
        "srcData, srcLen: Reference to final chunk of input data to be encrypted.",
        "destData, destLen: Output buffer.",
        "tag, tagLen: Output buffer filled with the compute."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output or tag buffer is not large enough to contain the output."
      ]
    },
    {
      "name": "TEE_AEDecryptFinal",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AEDecryptFinal(TEE_OperationHandle operation, [inbuf] void* srcData, size_t srcLen, [outbuf] void* destData, size_t *destLen, [in] void* tag, size_t tagLen)",
      "description": "The TEE_AEDecryptFinal function processes data that has not been processed by previous calls to TEE_AEUpdate as well as data supplied in srcData.",
      "parameters": [
        "operation: Handle of a running AE operation.",
        "srcData, srcLen: Reference to final chunk of input data to be decrypted.",
        "destData, destLen: Output buffer.",
        "tag, tagLen: Input buffer containing the tag to compare."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is not large enough to contain the output.",
        "TEE_ERROR_MAC_INVALID: If the computed tag does not match the supplied tag."
      ]
    },
    {
      "name": "TEE_AsymmetricEncrypt",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AsymmetricEncrypt(TEE_OperationHandle operation, [in] TEE_Attribute* params, uint32_t paramCount, [inbuf] void* srcData, size_t srcLen, [outbuf] void* destData, size_t *destLen); TEE_Result TEE_AsymmetricDecrypt(TEE_OperationHandle operation, [in] TEE_Attribute* params, uint32_t paramCount, [inbuf] void* srcData, size_t srcLen, [outbuf] void* destData, size_t *destLen)",
      "description": "The TEE_AsymmetricEncrypt function encrypts a message within an asymmetric operation.",
      "parameters": [
        "operation: Handle on the operation, which SHALL have been suitably set up with an operation key.",
        "params, paramCount: Optional operation parameters.",
        "srcData, srcLen: Input buffer.",
        "destData, destLen: Output buffer."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is not large enough to hold the result.",
        "TEE_ERROR_BAD_PARAMETERS: If the length of the input buffer is not consistent with the algorithm or key size.",
        "TEE_ERROR_CIPHERTEXT_INVALID: If there is an error in the packing used on the ciphertext."
      ]
    },
    {
      "name": "TEE_AsymmetricDecrypt",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AsymmetricDecrypt(TEE_OperationHandle operation, [in] TEE_Attribute* params, uint32_t paramCount, [inbuf] void* srcData, size_t srcLen, [outbuf] void* destData, size_t *destLen)",
      "description": "The TEE_AsymmetricDecrypt function decrypts the result.",
      "parameters": [
        "operation: Handle on the operation, which SHALL have been suitably set up with an operation key.",
        "params, paramCount: Optional operation parameters.",
        "srcData, srcLen: Input buffer.",
        "destData, destLen: Output buffer."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is not large enough to hold the result.",
        "TEE_ERROR_BAD_PARAMETERS: If the length of the input buffer is not consistent with the algorithm or key size.",
        "TEE_ERROR_CIPHERTEXT_INVALID: If there is an error in the packing used on the ciphertext."
      ]
    },
    {
      "name": "TEE_AsymmetricSignDigest",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AsymmetricSignDigest(TEE_OperationHandle operation, [in] TEE_Attribute* params, uint32_t paramCount, [inbuf] void* digest, size_t digestLen, [outbuf] void* signature, size_t *signatureLen)",
      "description": "The TEE_AsymmetricSignDigest function signs a message digest within an asymmetric operation.",
      "parameters": [
        "operation: Handle on the operation, which SHALL have been suitably set up with an operation key.",
        "params, paramCount: Optional operation parameters.",
        "digest, digestLen: Input buffer containing the input message digest.",
        "signature, signatureLen: Output buffer written with the signature of the digest."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the signature buffer is not large enough to hold the result."
      ]
    },
    {
      "name": "TEE_AsymmetricVerifyDigest",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_AsymmetricVerifyDigest(TEE_OperationHandle operation, [in] TEE_Attribute* params, uint32_t paramCount, [inbuf] void* digest, size_t digestLen, [inbuf] void* signature, size_t signatureLen)",
      "description": "The TEE_AsymmetricVerifyDigest function verifies a message digest signature within an asymmetric operation.",
      "parameters": [
        "operation: Handle on the operation, which SHALL have been suitably set up with an operation key.",
        "params, paramCount: Optional operation parameters.",
        "digest, digestLen: Input buffer containing the input message digest.",
        "signature, signatureLen: Input buffer containing the signature to verify."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SIGNATURE_INVALID: If the signature is invalid"
      ]
    },
    {
      "name": "TEE_DeriveKey",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_DeriveKey(TEE_OperationHandle operation, [inout] TEE_Attribute* params, uint32_t paramCount, TEE_ObjectHandle derivedKey)",
      "description": "The TEE_DeriveKey function takes one of the Asymmetric Derivation Operation Parameters as input, and outputs a key object.",
      "parameters": [
        "operation: Handle on the operation, which SHALL have been suitably set up with an operation key.",
        "params, paramCount: Operation parameters.",
        "derivedKey: Handle on an uninitialized transient object to be filled with the derived key."
      ],
      "return": []
    },
    {
      "name": "TEE_GenerateRandom",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_GenerateRandom([out] void* randomBuffer, size_t randomBufferLen)",
      "description": "The TEE_GenerateRandom function generates random data.",
      "parameters": [
        "randomBuffer: Reference to generated random data",
        "randomBufferLen: Byte length of requested random data."
      ],
      "return": []
    },
    {
      "name": "TEE_GetSystemTime",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_GetSystemTime([out] TEE_Time* time)",
      "description": "The TEE_GetSystemTime function retrieves the current system time.",
      "parameters": [
        "time: Filled with the number of seconds and milliseconds since midnight on January 1, 1970, UTC."
      ],
      "return": []
    },
    {
      "name": "TEE_Wait",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_Wait(uint32_t timeout)",
      "description": "The TEE_Wait function waits for the specified number of milliseconds or waits forever if timeout equals TEE_TIMEOUT_INFINITE (0xFFFFFFFF).",
      "parameters": [
        "timeout: The number of milliseconds to wait, or TEE_TIMEOUT_INFIN."
      ],
      "return": [
        "TEE_SUCCESS: On success.",
        "TEE_ERROR_CANCEL: If the wait has been cancelled."
      ]
    },
    {
      "name": "TEE_GetTAPersistentTime",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_GetTAPersistentTime([out] TEE_Time* time)",
      "description": "The TEE_GetTAPersistentTime function retrieves the persistent time of the Trusted Application, expressed as a number of seconds and milliseconds since the arbitrary origin set by calling TEE_SetTAPersistent.",
      "parameters": [
        "time: A pointer to the TEE_Time structure to be set to the current TA Persistent Time. If an error other than TEE_ERROR_OVERFLOW is returned, this structure is filled with zeroes."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_TIME_NOT_SET",
        "TEE_ERROR_TIME_NEEDS_RESET",
        "TEE_ERROR_OVERFLOW: The number of seconds in the TA Persistent Time overflows the range of a uint32_t.",
        "TEE_ERROR_OUT_OF_MEMORY: If not enough memory is available to complete the operation."
      ]
    },
    {
      "name": "TEE_SetTAPersistentTime",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_SetTAPersistentTime([in] TEE_Time* time)",
      "description": "The TEE_SetTAPersistentTime function sets the persistent time of the current Trusted Application.",
      "parameters": [
        "time: Filled with the persistent time of the current TA."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OUT_OF_MEMORY: If not enough memory is available to complete the operation.",
        "TEE_ERROR_STORAGE_NO_SPACE: If insufficient storage space is available to complete the operation."
      ]
    },
    {
      "name": "TEE_GetREETime",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_GetREETime([out] TEE_Time* time)",
      "description": "The TEE_GetREETime function retrieves the current REE system time.",
      "parameters": [
        "time: Filled with the number of seconds and milliseconds since midnight on January 1, 1970, UTC."
      ],
      "return": []
    },
    {
      "name": "TEE_BigIntInit",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntInit([out] TEE_BigInt *bigInt, size_t len)",
      "description": "The TEE_BigIntInit function initializes bigInt and sets its represented value to zero.",
      "parameters": [
        "bigInt: A pointer to the TEE_BigInt to be initialized.",
        "len: The size in uint32_t of the memory pointed to by bigInt."
      ],
      "return": []
    },
    {
      "name": "TEE_BigIntInitFMMContext1",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_BigIntInitFMMContext1([out] TEE_BigIntFMMContext *context, size_t len, [in] TEE_BigInt *modulus)",
      "description": "This function replaces the TEE_BigIntInitFMMContext function, whose use is deprecated.",
      "parameters": [
        "context: A pointer to the TEE_BigIntFMMContext to be initialized.",
        "len: The size in uint32_t of the memory pointed to by context.",
        "modulus: The modulus, an odd integer larger than 2 and less than 2 to the power of gpd.tee.arith.maxBigIntSize."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_NOT_SUPPORTED: The underlying implementation is unable to perform the operation on a particular modulus value."
      ]
    },
    {
      "name": "TEE_BigIntInitFMM",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntInitFMM([in] TEE_BigIntFMM *bigIntFMM, size_t len)",
      "description": "The TEE_BigIntInitFMM function initializes bigIntFMM and sets its represented value to zero.",
      "parameters": [
        "bigIntFMM: A pointer to the TEE_BigIntFMM to be initialized.",
        "len: The size in uint32_t of the memory pointed to by bigIntFMM."
      ],
      "return": []
    },
    {
      "name": "TEE_BigIntConvertFromOctetString",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_BigIntConvertFromOctetString([out] TEE_BigInt *dest, [inbuf] uint8_t *buffer, size_t bufferLen, int32_t sign)",
      "description": "The TEE_BigIntConvertFromOctetString function converts a bufferLen byte octet string buffer into a TEE_BigInt format. The octet string is in most significant byte first representation.",
      "parameters": [
        "dest: Pointer to a TEE_BigInt to hold the result.",
        "buffer: Pointer to the buffer containing the octet string representation of the integer.",
        "bufferLen: The length of *buffer in bytes.",
        "sign: The sign of dest is set to the sign of sign."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OVERFLOW: If memory allocation for the dest is too small."
      ]
    },
    {
      "name": "TEE_BigIntConvertToOctetString",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_BigIntConvertToOctetString([outbuf] void* buffer, size_t *bufferLen, [in] TEE_BigInt *bigInt)",
      "description": "The TEE_BigIntConvertToOctetString function converts the absolute value of an integer in TEE_BigInt format into an octet string.",
      "parameters": [
        "buffer, bufferLen: Output buffer where converted octet string representation of the integer is written.",
        "bigInt: Pointer to the integer that will be converted to an octet string."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_SHORT_BUFFER: If the output buffer is too small to contain the octet string."
      ]
    },
    {
      "name": "TEE_BigIntConvertFromS32",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntConvertFromS32([out] TEE_BigInt *dest, int32_t shortVal)",
      "description": "The TEE_BigIntConvertFromS32 function sets *dest to the value shortVal.",
      "parameters": [
        "dest: Pointer to the start of an array reference by TEE_BigInt * into which the result is stored.",
        "shortVal: Input value."
      ],
      "return": [
        "The result SHALL point to a memory allocation which is at least large enough for holding a 32-bit signed value in a TEE_BigInt structure."
      ]
    },
    {
      "name": "TEE_BigIntConvertToS32",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_BigIntConvertToS32([out] int32_t *dest, [in] TEE_BigInt *src)",
      "description": "The TEE_BigIntConvertToS32 function sets *dest to the value of src, including the sign of src.",
      "parameters": [
        "dest: Pointer to an int32_t to store the result.",
        "src: Pointer to the input value."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OVERFLOW: If src does not fit within an int32_t."
      ]
    },
    {
      "name": "TEE_BigIntCmp",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "int32_t TEE_BigIntCmp([in] TEE_BigInt *op1, [in] TEE_BigInt *op2)",
      "description": "The TEE_BigIntCmp function checks whether op1 > op2, op1 == op2, or op1 < op2.",
      "parameters": [
        "op1: Pointer to the first operand.",
        "op2: Pointer to the second operand."
      ],
      "return": [
        "This function returns a negative number if op1 < op2, 0 if op1 == op2, and a positive number if op1 > op"
      ]
    },
    {
      "name": "TEE_BigIntCmpS32",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "int32_t TEE_BigIntCmpS32([in] TEE_BigInt *op, int32_t shortVal)",
      "description": "The TEE_BigIntCmpS32 function checks whether op > shortVal, op == shortVal, or op < shortVal.",
      "parameters": [
        "op: Pointer to the first operand.",
        "shortVal: Pointer to the second operand."
      ],
      "return": [
        "This function returns a negative number if op < shortVal, 0 if op == shortVal, and a positive number if op > shortVal"
      ]
    },
    {
      "name": "TEE_BigIntShiftRight",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntShiftRight([out] TEE_BigInt *dest, [in] TEE_BigInt *op size_t bits)",
      "description": "The TEE_BigIntShiftRight function computes |dest| = |op| >> bits and dest will have the same sign as op**5.",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the shifted result.",
        "op: Pointer to the operand to be shifted.",
        "bits: Number of bits to shift."
      ],
      "return": []
    },
    {
      "name": "TEE_BigIntGetBit",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "bool TEE_BigIntGetBit([in] TEE_BigInt *src, uint32_t bitIndex)",
      "description": "The TEE_BigIntGetBit function returns the bitIndexth bit of the natural binary representation of |src|.",
      "parameters": [
        "src: Pointer to the integer.",
        "bitIndex: The offset of the bit to be read, starting at offset 0 for the least significant bit."
      ],
      "return": [
        "The Boolean value of the bitIndexth bit in |src|. True represents a “1” and false represents a “0”."
      ]
    },
    {
      "name": "TEE_BigIntGetBitCount",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "int32_t TEE_BigIntGetBitCount([in] TEE_BigInt *src)",
      "description": "The TEE_BigIntGetBitCount function returns the number of bits in the natural binary representation of |src|; that is, the magnitude of src.",
      "parameters": [
        "src: Pointer to the integer."
      ],
      "return": [
        "The number of bits in the natural binary representation of |src|. If src equals zero, it will return 0."
      ]
    },
    {
      "name": "TEE_BigIntSetBit",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_BigIntSetBit([inout] TEE_BigInt *op, uint32_t bitIndex, bool value)",
      "description": "The TEE_BigIntSetBit function sets the bitIndexth bit of the natural binary representation of |op| to 1 or 0, depending on the parameter value.",
      "parameters": [
        "op: Pointer to the integer",
        "bitIndex: The offset of the bit to be set, starting at offset 0 for the least significant bit.",
        "value: The bit value to set where true represents a “1” and false represents a “0”."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OVERFLOW: If the bitIndexth bit is larger than allocated bit length of op."
      ]
    },
    {
      "name": "TEE_BigIntAssign",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_BigIntAssign([out] TEE_BigInt *dest, [in] TEE_BigInt *src)",
      "description": "The TEE_BigIntAssign function assigns the value of src to dest.",
      "parameters": [
        "dest: Pointer to TEE_BigInt to be assigned.",
        "src: Pointer to the source operand."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OVERFLOW: In case the dest operand cannot hold the value of src."
      ]
    },
    {
      "name": "TEE_BigIntAbs",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "TEE_Result TEE_BigIntAbs([out] TEE_BigInt *dest, [in] TEE_BigInt *src)",
      "description": "The TEE_BigIntAbs function assigns the value of |src| to dest.",
      "parameters": [
        "dest: Pointer to TEE_BigInt to be assigned.",
        "src: Pointer to the source operand."
      ],
      "return": [
        "TEE_SUCCESS: In case of success.",
        "TEE_ERROR_OVERFLOW: In case the dest operand cannot hold the value of |src|."
      ]
    },
    {
      "name": "TEE_BigIntAdd",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntAdd([out] TEE_BigInt *dest, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2)",
      "description": "The TEE_BigIntAdd function computes dest = op1 + op2.",
      "parameters": [
        "dest: Pointer to TEE_BigInt to store the result op1 + op2",
        "op1: Pointer to the first operand",
        "op2: Pointer to the second operand"
      ],
      "return": [
        "Depending on the sign of op1 and op2, the result may be larger or smaller than op1 and op2."
      ]
    },
    {
      "name": "TEE_BigIntSub",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntSub([out] TEE_BigInt *dest, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2)",
      "description": "The TEE_BigIntSub function computes dest = op1 – op2. All or some of dest, op1, and op2 MAY point to the same memory region but SHALL point to the start address of a TEE_BigIn.",
      "parameters": [
        "dest: Pointer to TEE_BigInt to store the result op1 – op2.",
        "op1: Pointer to the first operand.",
        "op2: Pointer to the second operand."
      ],
      "return": [
        "Depending on the sign of op1 and op2, the result may be larger or smaller than op1 and op2."
      ]
    },
    {
      "name": "TEE_BigIntNeg",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntNeg([out] TEE_BigInt *dest, [in] TEE_BigInt *op)",
      "description": "",
      "parameters": [
        "The TEE_BigIntNeg function negates an operand: dest = -op.",
        "dest: Pointer to TEE_BigInt to store the result -op.",
        "op: Pointer to the operand to be negated."
      ],
      "return": [
        "The result SHALL have memory allocation for magnitude(op) bits."
      ]
    },
    {
      "name": "TEE_BigIntMul",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntMul([out] TEE_BigInt *dest, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2)",
      "description": "The TEE_BigIntMul function computes dest = op1 * op2.",
      "parameters": [
        "dest: Pointer to TEE_BigInt to store the result op1 * op2.",
        "op1: Pointer to the first operand.",
        "op2: Pointer to the second operand."
      ],
      "return": [
        "The result SHALL have memory allocation for (magnitude(op1) + magnitude(op2)) bits."
      ]
    },
    {
      "name": "TEE_BigIntSquare",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntSquare([out] TEE_BigInt *dest, [in] TEE_BigInt *op)",
      "description": "The TEE_BigIntSquare function computes dest = op * op.",
      "parameters": [
        "dest: Pointer to TEE_BigInt to store the result op * op",
        "op: Pointer to the operand to be square."
      ],
      "return": [
        "The result SHALL have memory allocation for 2*magnitude(op) bits."
      ]
    },
    {
      "name": "TEE_BigIntDiv",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntDiv([out] TEE_BigInt *dest_q, [out] TEE_BigInt *dest_r, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2)",
      "description": "The TEE_BigIntDiv function computes dest_r and dest_q such that op1 = dest_q * op2 + dest_r.",
      "parameters": [
        "dest_q: Pointer to a TEE_BigInt to store the quotient. dest_q can be NULL.",
        "dest_r: Pointer to a TEE_BigInt to store the remainder. dest_r can be NULL.",
        "op1: Pointer to the first operand, the dividend.",
        "op2: Pointer to the second operand, the divisor."
      ],
      "return": [
        "The quotient, dest_q, SHALL have memory allocation sufficient to hold a TEE_BigInt with magnitude."
      ]
    },
    {
      "name": "TEE_BigIntMod",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntMod([out] TEE_BigInt *dest, [in] TEE_BigInt *op, [in] TEE_BigInt *n)",
      "description": "interval [0, n-1].",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the result op (mod n). The result dest will be in the",
        "op: Pointer to the operand to be reduced mod n.",
        "n: Pointer to the modulus. Modulus SHALL be larger than 1."
      ],
      "return": [
        "The result dest SHALL have memory allocation for magnitude(n) bits."
      ]
    },
    {
      "name": "TEE_BigIntAddMod",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntAddMod([out] TEE_BigInt *dest, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2, [in] TEE_BigInt *n)",
      "description": "The TEE_BigIntAddMod function computes dest = (op1 + op2) (mod n).",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the result (op1 + op2) (mod n).",
        "op1: Pointer to the first operand. Operand SHALL be in the interval [0,n-1].",
        "op2: Pointer to the second operand. Operand SHALL be in the interval [0,n-1].",
        "n: Pointer to the modulus. Modulus SHALL be larger than 1."
      ],
      "return": [
        "The result dest SHALL have memory allocation for magnitude(n) bits."
      ]
    },
    {
      "name": "TEE_BigIntSubMod",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntSubMod([out] TEE_BigInt *dest, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2, [in] TEE_BigInt *n)",
      "description": "The TEE_BigIntSubMod function computes dest = (op1 - op2) (mod n).",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the result (op1 - op2) (mod n).",
        "op1: Pointer to the first operand. Operand SHALL be in the interval [0,n-1].",
        "op2: Pointer to the second operand. Operand SHALL be in the interval [0,n-1].",
        "n: Pointer to the modulus. Modulus SHALL be larger than 1."
      ],
      "return": [
        "The result dest SHALL have memory allocation for magnitude(n) bits."
      ]
    },
    {
      "name": "TEE_BigIntMulMod",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntMulMod([out] TEE_BigInt *dest, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2, [in] TEE_BigInt *n)",
      "description": "The TEE_BigIntMulMod function computes dest = (op1 * op2) (mod n).",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the result (op1 * op2) (mod n).",
        "op1: Pointer to the first operand. Operand SHALL be in the interval [0,n-1].",
        "op2: Pointer to the second operand. Operand SHALL be in the interval [0,n-1].",
        "n: Pointer to the modulus. Modulus SHALL be larger than 1."
      ],
      "return": [
        "The result dest SHALL have memory allocation for magnitude(n) bits."
      ]
    },
    {
      "name": "TEE_BigIntSquareMod",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntSquareMod([out] TEE_BigInt *dest, [in] TEE_BigInt *op, [in] TEE_BigInt *n)",
      "description": "The TEE_BigIntSquareMod function computes dest = (op * op) (mod n).",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the result (op * op) (mod n).",
        "op: Pointer to the operand. Operand SHALL be in the interval [0,n-1].",
        "n: Pointer to the modulus. Modulus SHALL be larger than 1."
      ],
      "return": [
        "The result dest SHALL have memory allocation for magnitude(n) bits."
      ]
    },
    {
      "name": "TEE_BigIntInvMod",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntInvMod([out] TEE_BigInt *dest, [in] TEE_BigInt *op, [in] TEE_BigInt *n)",
      "description": "The TEE_BigIntInvMod function computes dest such that dest * op = 1 (mod n).",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the result (op^-1) (mod n).",
        "op: Pointer to the operand. Operand SHALL be in the interval [1,n-1].",
        "n: Pointer to the modulus. Modulus SHALL be larger than 1."
      ],
      "return": [
        "The result dest SHALL have memory allocation for magnitude(n) bits."
      ]
    },
    {
      "name": "TEE_BigIntExpMod",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntExpMod([out] TEE_BigInt *dest, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2, [in] TEE_BigInt *n, [in] TEE_BigIntFMMContext *context)",
      "description": "The TEE_BigIntExpMod function computes dest = (op1 ^ op2) (mod n).",
      "parameters": [
        "dest: Pointer to TEE_BigInt to hold the result (op1 ^ op2) (mod n)",
        "op1: Pointer to the first operand.",
        "op2: Pointer to the second operand.",
        "n: Pointer to the modulus.",
        "context: Pointer to a context previously initialized using TEE_BigIntInitFMMContext1, or NULL."
      ],
      "return": [
        "The result dest SHALL have memory allocation for magnitude(n) bits."
      ]
    },
    {
      "name": "TEE_BigIntRelativePrime",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "bool TEE_BigIntRelativePrime([in] TEE_BigInt *op1, [in] TEE_BigInt *op2)",
      "description": "The TEE_BigIntRelativePrime function determines whether gcd(op1, op2) == 1.",
      "parameters": [
        "op1: Pointer to the first operand.",
        "op2: Pointer to the second operand."
      ],
      "return": [
        "true if gcd(op1, op2) == 1.",
        "false otherwise."
      ]
    },
    {
      "name": "TEE_BigIntComputeExtendedGcd",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntComputeExtendedGcd([out] TEE_BigInt *gcd, [out] TEE_BigInt *u, [out] TEE_BigInt *v, [in] TEE_BigInt *op1, [in] TEE_BigInt *op2)",
      "description": "The TEE_BigIntComputeExtendedGcd function computes the greatest common divisor of the input parameters op1 and op2. op1 and op2 SHALL NOT both be zero.",
      "parameters": [
        "gcd: Pointer to TEE_BigInt to hold the greatest common divisor of op1 and op2.",
        "u: Pointer to TEE_BigInt to hold the first coefficient.",
        "v: Pointer to TEE_BigInt to hold the second coefficient.",
        "op1: Pointer to the first operand.",
        "op2: Pointer to the second operand."
      ],
      "return": [
        "The gcd result SHALL be able to hold max(magnitude(op1), magnitude(op2)) bits.",
        "If op1 != 0 and op2 != 0, then |u| < |op2/gcd| and |v| < |op1/gcd|.",
        "If op1 != 0 and op2 = 0, then v = 0.",
        "If op2 != 0 and op1 = 0, then u = 0."
      ]
    },
    {
      "name": "TEE_BigIntIsProbablePrime",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "int32_t TEE_BigIntIsProbablePrime([in] TEE_BigInt *op, uint32_t confidenceLevel)",
      "description": "The TEE_BigIntIsProbablePrime function performs a probabilistic primality test on op.",
      "parameters": [
        "op: Candidate number that is tested for primality",
        "confidenceLevel: The desired confidence level for a non-conclusive test.",
        "0: If op is a composite number.",
        "1: If op is guaranteed to be prime.",
        "-1: If the test is non-conclusive but the probability that op is composite is less than 2^(-confidenceLe)."
      ],
      "return": []
    },
    {
      "name": "TEE_BigIntConvertToFMM",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntConvertToFMM([out] TEE_BigIntFMM *dest, [in] TEE_BigInt *src, [in] TEE_BigInt *n, [in] TEE_BigIntFMMContext *context)",
      "description": "The TEE_BigIntConvertToFMM function converts src into a representation suitable for doing fast modular multiplication.",
      "parameters": [
        "dest: Pointer to an initialized TEE_BigIntFMM memory area",
        "src: Pointer to the TEE_BigInt to convert",
        "n: Pointer to the modulus",
        "context: Pointer to a context previously initialized using TEE_BigIntInitFMMContext"
      ],
      "return": []
    },
    {
      "name": "TEE_BigIntConvertFromFMM",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntConvertFromFMM([out] TEE_BigInt *dest, [in] TEE_BigIntFMM *src, [in] TEE_BigInt *n, [in] TEE_BigIntFMMContext *context)",
      "description": "The TEE_BigIntConvertFromFMM function converts src in the fast modular multiplication representation back to a TEE_BigInt representation.",
      "parameters": [
        "dest: Pointer to an initialized TEE_BigInt memory area to hold the converted result.",
        "src: Pointer to a TEE_BigIntFMM holding the value in the fast modular multiplication representation.",
        "n: Pointer to the modulus.",
        "context: Pointer to a context previously initialized using TEE_BigIntInitFMMContext1."
      ],
      "return": []
    },
    {
      "name": "TEE_BigIntComputeFMM",
      "headerfile_desc": "tee_internal_api.h",
      "func_name": "void TEE_BigIntComputeFMM([out] TEE_BigIntFMM *dest, [in] TEE_BigIntFMM *op1, [in] TEE_BigIntFMM *op2, [in] TEE_BigInt *n, [in] TEE_BigIntFMMContext *context)",
      "description": "The TEE_BigIntComputeFMM function calculates dest = op1 * op2 in the fast modular multiplication representation.",
      "parameters": [
        "dest: Pointer to TEE_BigIntFMM to hold the result op1 * op2 in the fast modular multiplication representation.",
        "op1: Pointer to the first operand.",
        "op2: Pointer to the second operand.",
        "n: Pointer to the modulus.",
        "context: Pointer to a context previously initialized using TEE_BigIntInitFMMContext1."
      ],
      "return": []
    }
  ],
  "HMPI_FUNCTION": [
    {
      "name": "MPI_Abort",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Abort(MPI_Comm comm, int errorcode)",
      "description": "This routine makes a \"best attempt\" to abort all tasks in the group of comm.",
      "input_parameters": [
        "comm: Communicator of tasks to abort.",
        "errorcode: Error code to return to invoking environment."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Accumulate",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win)",
      "description": "MPI_Accumulate is a function used for one-sided MPI communication that adds the contents of the origin buffer (as defined by origin_addr, origin_count, and origin_datatype) to the buffer specified by the arguments target_count and target_datatype, at offset target_disp, in the target window specified by target_rank and win, using the operation op. The target window can only be accessed by processes within the same node. This is similar to MPI_Put, except that data is combined into the target area instead of overwriting it.",
      "input_parameters": [
        "origin_addr: Initial address of buffer (choice).",
        "origin_count: Number of entries in buffer (nonnegative integer).",
        "origin_datatype: Data type of each buffer entry (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to beginning of target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: Data type of each entry in target buffer (handle).",
        "op: Reduce operation (handle).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Address",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Address(void *location, MPI_Aint *address)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Get_address instead.",
      "input_parameters": [
        "location: Location in caller memory (choice)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "address: Address of location (integer)."
      ]
    },
    {
      "name": "MPI_Add_error_class",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Add_error_class(int *errorclass)",
      "description": "The function MPI_Add_error_class creates a new, local errorclass.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "errorclass: New error class (integer)."
      ]
    },
    {
      "name": "MPI_Add_error_code",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Add_error_code(int errorclass, int *errorcode)",
      "description": "Creates a new error code associated with errorclass and returnsits value in errorcode.",
      "input_parameters": [
        "errorclass: MPI error class (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errorcode: Error code returned by an MPI routine or an MPI error class (integer)."
      ]
    },
    {
      "name": "MPI_Add_error_string",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Add_error_string(int errorcode, const char *string)",
      "description": "This routine associates an error string with an error code orclass.",
      "input_parameters": [
        "errorcode: MPI error class, or an error code returned by an MPI routine (integer).",
        "string: Text that corresponds to the error code or class (string)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Aint_add",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Aint MPI_Aint_add(MPI_Aint base, MPI_Aint disp)",
      "description": "MPI_Aint_add produces a new MPI_Aint value that is equivalent to the sum ofthe base and disp arguments, where base representsa base address returned by a call to MPI_Get_address anddisp represents a signed integer displacement.",
      "input_parameters": [
        "base: Base address (integer).",
        "disp: Displacement (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Aint_diff",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Aint MPI_Aint_diff(MPI_Aint addr1, MPI_Aint addr2)",
      "description": "MPI_Aint_diff produces a new MPI_Aint value that is equivalentto the difference between addr1 and addr2 arguments, whereaddr1 and addr2 represent addresses returned by calls toMPI_Get_address.",
      "input_parameters": [
        "addr1: Minuend address (integer).",
        "addr2: Subtrahend address (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Allgather",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Allgather is similar to MPI_Gather, except that all processes receive the result, instead of just the root. In other words, all processes contribute to the result, and all processes receive the result.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvbuf: Starting address of recv buffer (choice).",
        "recvcount: Number of elements received from any process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Allgatherv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Allgatherv is similar to MPI_Allgather in that all processes gather data from all other processes, except that each process can send a different amount of data.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Integer array (of length group size) containing the number of elements that are received from each process.",
        "displs: Integer array (of length group size). Entry i specifies the displacement (relative to recvbuf) at which to place the incoming data from process i.",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Alloc_mem",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)",
      "description": "MPI_Alloc_mem allocates size bytes of memory.",
      "input_parameters": [
        "size: Size of memory segment in bytes (nonnegative integer).",
        "info: Info argument (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "baseptr: Pointer to beginning of memory segment allocated."
      ]
    },
    {
      "name": "MPI_Allreduce",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)",
      "description": "Same as MPI_Reduce except that the result appears in the receive buffer of all the group members.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Datatype of elements of send buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Alltoall",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Alltoall is a collective operation in which all processes send the same amount of data to each other, and receive the same amount of data from each other. The operation of this routine can be represented as follows, where each process performs 2n (n being the number of processes in communicator comm) independent point-to-point communications (including communication with itself).",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements to send to each process (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements to receive from each process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator over which data is to be exchanged (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Alltoallv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Alltoallv is a generalized collective operation in which allprocesses send data to and receive data from all other processes. Itadds flexibility to MPI_Alltoall by allowing the user to specify datato send and receive vector-style (via a displacement and elementcount). The operation of this routine can be thought of as follows,where each process performs 2n (n being the number of processes incommunicator comm) independent point-to-point communications(including communication with itself).",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send to rank i.",
        "sdispls: Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype) from which to send data to rank i.",
        "sendtype: Datatype of send buffer elements.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from rank j.",
        "rdispls: Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to which data from rank j should be written.",
        "recvtype: Datatype of receive buffer elements.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Alltoallw",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm)",
      "description": "MPI_Alltoallw is a generalized collective operation in which allprocesses send data to and receive data from all other processes. Itadds flexibility to MPI_Alltoallv by allowing the user to specify thedatatype of individual data blocks (in addition to displacement andelement count). Its operation can be thought of in the following way,where each process performs 2n (n being the number of processes incommunicator comm) independent point-to-point communications(including communication with itself).",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send to rank i.",
        "sdispls: Integer array, where entry i specifies the displacement (in bytes, offset from sendbuf) from which to send data to rank i.",
        "sendtypes: Datatype array, where entry i specifies the datatype to use when sending data to rank i.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from rank j.",
        "rdispls: Integer array, where entry j specifies the displacement (in bytes, offset from recvbuf) to which data from rank j should: be written.",
        "recvtypes: Datatype array, where entry j specifies the datatype to use when receiving data from rank j.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Attr_delete",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Attr_delete(MPI_Comm comm, int keyval)",
      "description": "Note that use of this routine is deprecated as of MPI-2, andwas deleted in MPI-3. Please use MPI_Comm_delete_attr. Thisfunction does not have a C++ or mpi_f08 binding.",
      "input_parameters": [
        "comm: Communicator to which attribute is attached (handle).",
        "keyval: The key value of the deleted attribute (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Attr_get",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag)",
      "description": "Note that use of this routine is deprecated as of MPI-2, andwas deleted in MPI-3. Please use MPI_Comm_get_attr. Thisfunction does not have a C++ or mpi_f08 binding.",
      "input_parameters": [
        "comm: Communicator to which attribute is attached (handle).",
        "keyval: Key value (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "attribute_val: Attribute value, unless flag = false.",
        "flag: True if an attribute value was extracted; false if no attribute is associated with the key."
      ]
    },
    {
      "name": "MPI_Attr_put",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val)",
      "description": "Note that use of this routine is deprecated as of MPI-2, andwas deleted in MPI-3. Please use MPI_Comm_set_attr. Thisfunction does not have a C++ or mpi_f08 binding.",
      "input_parameters": [
        "comm: Communicator to which attribute will be attached (handle).",
        "keyval: Key value, as returned by MPI_KEYVAL_CREATE (integer).",
        "attribute_val: Attribute value."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Barrier",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Barrier(MPI_Comm comm)",
      "description": "An MPI barrier completes after all group members have entered thebarrier.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Bcast",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm)",
      "description": "MPI_Bcast broadcasts a message from the process with rank root to all processes of the group, itself included. It is called by all members of group using the same arguments for comm, root. On return, the contents of root's communication buffer has been copied to all processes.",
      "input_parameters": [],
      "input_output_parameters": [
        "buffer: Starting address of buffer (choice).",
        "count: Number of entries in buffer (integer).",
        "datatype: Data type of buffer (handle).",
        "root: Rank of broadcast root (integer).",
        "comm: Communicator (handle)."
      ],
      "output_parameters": [
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Bsend",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)",
      "description": "MPI_Bsend performs a buffered-mode, blocking send.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of entries in send buffer (nonnegative integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Bsend_init",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "Creates a persistent communication request for a buffered mode send, and binds to it all the arguments of a send operation.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements sent (integer).",
        "datatype: Type of each element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Buffer_attach",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Buffer_attach(void *buf, int size)",
      "description": "Provides to MPI a buffer in the user's memory to be used for buffering outgoing messages. The buffer is used only by messages sent in buffered mode. Only one buffer can be attached to a process at a time.",
      "input_parameters": [
        "buf: Initial buffer address (choice).",
        "size: Buffer size, in bytes (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Buffer_detach",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Buffer_detach(void *buf, int *size)",
      "description": "Detach the buffer currently associated with MPI. The call returns the address and the size of the detached buffer. This operation will block until all messages currently in the buffer have been transmitted. Upon return of this function, the user may reuse or deallocate the space taken by the buffer.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial buffer address (choice).",
        "size: Buffer size, in bytes (integer)."
      ]
    },
    {
      "name": "MPI_Cancel",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cancel(MPI_Request *request)",
      "description": "The MPI_Cancel operation allows pending communications to be canceled. This is required for cleanup. Posting a send or a receive ties up user resources (send or receive buffers), and a cancel may be needed to free these resources gracefully.",
      "input_parameters": [
        "request: Communication request (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Cartdim_get",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cartdim_get(MPI_Comm comm, int *ndims)",
      "description": "MPI_Cartdim_get returns the number of dimensions of the Cartesian structure.",
      "input_parameters": [
        "comm: Communicator with Cartesian structure (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "ndims: Number of dimensions of the Cartesian structure (integer)."
      ]
    },
    {
      "name": "MPI_Cart_coords",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[])",
      "description": "MPI_Cart_coords provies a mapping of ranks to Cartesian coordinates.",
      "input_parameters": [
        "comm: Communicator with Cartesian structure (handle).",
        "rank: Rank of a process within group of comm (integer).",
        "maxdims: Length of vector coords in the calling program (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "coords: Integer array (of size ndims,which was defined by MPI_Cart_create call) containing the Cartesian coordinates of specified process (integer)."
      ]
    },
    {
      "name": "MPI_Cart_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cart_create(MPI_Comm comm_old, int ndims, const int dims[], const int periods[], int reorder, MPI_Comm *comm_cart)",
      "description": "MPI_Cart_create returns a handle to a new communicator to which the Cartesian topology information is attached. If reorder = false then the rank of each process in the new group is identical to its rank in the old group. Otherwise, the function may reorder the processes (possibly so as to choose a good embedding of the virtual topology onto the physical machine). If the total size of the Cartesian grid is smaller than the size of the group of comm, then some processes are returned MPI_COMM_NULL, in analogy to MPI_Comm_split. The call is erroneous if it specifies a grid that is larger than the group size.",
      "input_parameters": [
        "comm_old: Input communicator (handle).",
        "ndims: Number of dimensions of Cartesian grid (integer).",
        "dims: Integer array of size ndims specifying the number of processes in each: dimension.",
        "periods: Logical array of size ndims specifying whether the grid is periodic (true) or not (false) in each dimension.",
        "reorder: Ranking may be reordered (true) or not (false) (logical)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "comm_cart: Communicator with new Cartesian topology (handle)."
      ]
    },
    {
      "name": "MPI_Cart_get",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], int periods[], int coords[])",
      "description": "The functions MPI_Cartdim_get and MPI_Cart_get return the Cartesian topology information that was associated with a communicator by MPI_Cart_create.",
      "input_parameters": [
        "comm: Communicator with Cartesian structure (handle).",
        "maxdims: Length of vectors dims, periods, and coords in the calling program (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "dims: Number of processes for each Cartesian dimension (array of integers).",
        "periods: Periodicity (true/false) for each Cartesian dimension (array of logicals).",
        "coords: Coordinates of calling process in Cartesian structure (array of integers)."
      ]
    },
    {
      "name": "MPI_Cart_map",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cart_map(MPI_Comm comm, int ndims, const int dims[], const int periods[], int *newrank)",
      "description": "MPI_Cart_map and MPI_Graph_map can be used to implement all other topology functions. In general they will not be called by the user directly, unless he or she is creating additional virtual topology capability other than that provided by MPI.",
      "input_parameters": [
        "comm: Input communicator (handle).",
        "ndims: Number of dimensions of Cartesian structure (integer).",
        "dims: Integer array of size ndims specifying the number of processes in each: coordinate direction.",
        "periods: Logical array of size ndims specifying the periodicity specification in each coordinate direction."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newrank: Reordered rank of the calling process; MPI_UNDEFINED if calling process does not belong to grid (integer)."
      ]
    },
    {
      "name": "MPI_Cart_rank",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cart_rank(MPI_Comm comm, int coords[], int *rank)",
      "description": "For a process group with Cartesian structure, the function MPI_Cart_ranktranslates the logical process coordinates to process ranks as they are used by the point-to-point routines. For dimension i with periods(i) = true, if the coordinate, coords(i), is out of range, that is, coords(i) < 0 or coords(i) >= dims(i), it is shifted back to the interval 0 =< coords(i) < dims(i) automatically. Out-of-range coordinates are erroneous for nonperiodic dimensions.",
      "input_parameters": [
        "comm: Communicator with Cartesian structure (handle).",
        "coords: Integer array (of size ndims, which was defined by MPI_Cart_create call) specifying the Cartesian coordinates of a process."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "rank: Rank of specified process (integer)."
      ]
    },
    {
      "name": "MPI_Cart_shift",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest)",
      "description": "If the process topology is a Cartesian structure, an MPI_Sendrecv operation is likely to be used along a coordinate direction to perform a shift of data. As input, MPI_Sendrecv takes the rank of a source process for the receive, and the rank of a destination process for the send. If the function MPI_Cart_shift is called for a Cartesian process group, it provides the calling process with the above identifiers, which then can be passed to MPI_Sendrecv. The user specifies the coordinate direction and the size of the step (positive or negative). The function is local.",
      "input_parameters": [
        "comm: Communicator with Cartesian structure (handle).",
        "direction: Coordinate dimension of shift (integer).",
        "disp: Displacement ( > 0: upward shift, < 0: downward shift) (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "rank_source: Rank of source process (integer).",
        "rank_dest: Rank of destination process (integer)."
      ]
    },
    {
      "name": "MPI_Cart_sub",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *comm_new)",
      "description": "If a Cartesian topology has been created with MPI_Cart_create, the function MPI_Cart_sub can be used to partition the communicator group into subgroups that form lower-dimensional Cartesian subgrids, and to build for each subgroup a communicator with the associated subgrid Cartesian topology. (This function is closely related to MPI_Comm_split.)",
      "input_parameters": [
        "comm: Communicator with Cartesian structure (handle).",
        "remain_dims: The ith entry of remain_dims specifies whether the ith dimension is kept in the subgrid (true) or is dropped (false) (logical vector)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "comm_new: Communicator containing the subgrid that includes the calling process (handle)."
      ]
    },
    {
      "name": "MPI_Close_port",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Close_port(const char *port_name)",
      "description": "MPI_Close_port releases the network address represented by port_name.",
      "input_parameters": [
        "port_name: A port (string)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_accept",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm)",
      "description": "MPI_Comm_accept establishes communication with a client. It is collective over the calling communicator. It returns an intercommunicator that allows communication with the client, after the client has connected with the MPI_Comm_accept function using the MPI_Comm_connect function.",
      "input_parameters": [
        "port_name: Port name (string, used only on root).",
        "info: Options given by root for the accept (handle, used only on root). No options currently supported.",
        "root: Rank in comm of root node (integer).",
        "comm: Intracommunicator over which call is collective (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: Intercommunicator with client as remote group (handle)"
      ]
    },
    {
      "name": "MPI_Comm_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Comm_c2f(MPI_Comm comm)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_call_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_call_errhandler(MPI_Comm comm, int errorcode)",
      "description": "This function invokes the error handler assigned to the communicatorcomm with the supplied error code errorcode. If the errorhandler was successfully called, the process is not aborted, and theerror handler returns, this function returns MPI_SUCCESS.",
      "input_parameters": [
        "comm: communicator with error handler (handle).",
        "errorcode: error code (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_compare",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, int *result)",
      "description": "MPI_IDENT results if and only if comm1 and comm2 are handles for the same object (identical groups and same contexts). MPI_CONGRUENT results if the underlying groups are identical in constituents and rank order; these communicators differ only by context. MPI_SIMILAR results of the group members of both communicators are the same but the rank order differs. MPI_UNEQUAL results otherwise.",
      "input_parameters": [
        "comm1: Comm1 (handle).",
        "comm2: Comm2 (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "result: Result of comparison (integer)."
      ]
    },
    {
      "name": "MPI_Comm_connect",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm)",
      "description": "MPI_Comm_connect establishes communication with a server specified by port_name. It is collective over the calling communicator and returns an intercommunicator in which the remote group participated in an MPI_Comm_accept. The MPI_Comm_connect call must only be called after the MPI_Comm_accept call has been made by the MPI job acting as the server.",
      "input_parameters": [
        "port_name: Port name (string, used only on root).",
        "info: Options given by root for the connect (handle, used only on root). No options currently supported.",
        "root: Rank in comm of root node (integer).",
        "comm: Intracommunicator over which call is collective (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: Intercommunicator with client as remote group (handle)"
      ]
    },
    {
      "name": "MPI_Comm_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm)",
      "description": "This function creates a new communicator newcomm with communicationgroup defined by group and a new context. The function setsnewcomm to a new communicator that spans all the processes thatare in the group. It sets newcomm to MPI_COMM_NULL forprocesses that are not in the group.Each process must call with a group argument that is a subgroupof the group associated with comm; this could beMPI_GROUP_EMPTY. The processes may specify different values for thegroup argument. If a process calls with a non-empty group,then all processes in that group must call the function with the samegroup as argument, that is: the same processes in the sameorder. Otherwise the call is erroneous.",
      "input_parameters": [
        "comm: Communicator (handle).",
        "group: Group, which is a subset of the group of comm (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: New communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_create_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *function, MPI_Errhandler *errhandler)",
      "description": "MPI_Comm_create_errhandler creates an error handler that can be attached to communicators. This function is identical to MPI_Errhandler_create, the use of which is deprecated.",
      "input_parameters": [
        "function: User-defined error handling procedure (function)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: MPI error handler (handle)."
      ]
    },
    {
      "name": "MPI_Comm_create_group",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm)",
      "description": "MPI_Comm_create_group is similar to MPI_Comm_create; however,MPI_Comm_create must be called by all processes in the group ofcomm, whereas MPI_Comm_create_group must be called by all processes in group,which is a subgroup of the group of comm. In addition, MPI_Comm_create_grouprequires that comm is an intracommunicator. MPI_Comm_create_group returns a newintracommunicator, newcomm, for which the group argument defines the communicationgroup. No cached information propagates from comm to newcomm.Each process must provide a group argument that is a subgroup of the group associated with comm;this could be MPI_GROUP_EMPTY. If a non-empty group is specified, then all processes in thatgroup must call the function, and each of these processes must provide the same arguments,including a group that contains the same members with the same ordering. Otherwisethe call is erroneous. If the calling process is a member of the group given as the groupargument, then newcomm is a communicator with group as its associated group. If thecalling process is not a member of group, e.g., group is MPI_GROUP_EMPTY, then the callis a local operation and MPI_COMM_NULL is returned as newcomm.",
      "input_parameters": [
        "comm: Communicator (handle).",
        "group: Group, which is a subset of the group of comm (handle).",
        "tag: Tag (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: New communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_create_keyval",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function*comm_copy_attr_fn, MPI_Comm_delete_attr_function*comm_delete_attr_fn, int *comm_keyval, void *extra_state)",
      "description": "This function replaces MPI_Keyval_create, the use of which is deprecated. The C binding is identical. The Fortran binding differs in that extra_state is an address-sized integer. Also, the copy and delete callback functions have Fortran bindings that are consistent with address-sized attributes.",
      "input_parameters": [
        "comm_copy_attr_fn: Copy callback function for comm_keyval (function).",
        "comm_delete_attr_fn: Delete callback function for comm_keyval (function).",
        "extra_state: Extra state for callback functions."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "comm_keyval: Key value for future access (integer)."
      ]
    },
    {
      "name": "MPI_Comm_delete_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval)",
      "description": "MPI_Comm_delete_attr deletes an attribute from cache by key. This function invokes the attribute delete function delete_fn specified when the comm_keyval was created. The call will fail if the delete_fn function returns an error code other than MPI_SUCCESS.Whenever a communicator is replicated using the function MPI_Comm_dup, all callback copy functions for attributes that are currently set are invoked (in arbitrary order). Whenever a communicator is deleted using the function MPI_Comm_free, all callback delete functions for attributes that are currently set are invoked.",
      "input_parameters": [
        "comm_keyval: Key value (integer)."
      ],
      "input_output_parameters": [
        "comm: Communicator from which the attribute is deleted (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_disconnect",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_disconnect(MPI_Comm *comm)",
      "description": "MPI_Comm_disconnect waits for all pending communication on comm to complete internally, deallocates the communicator object, and sets the handle to MPI_COMM_NULL. It is a collective operation.",
      "input_parameters": [],
      "input_output_parameters": [
        "comm: Communicator (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_dup",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm)",
      "description": "MPI_Comm_dup duplicates the existing communicator comm with associated keyvalues. For each key value, the respective copy callback function determines the attribute value associated with this key in the new communicator; one particular action that a copy callback may take is to delete the attribute from the new communicator. Returns in newcomm a new communicator with the same group, any copied cached information, but a new context (see Section 5.7.1 of the MPI-1 Standard, \"Functionality\").",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: Copy of comm (handle)."
      ]
    },
    {
      "name": "MPI_Comm_dup_with_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm)",
      "description": "MPI_Comm_dup_with_info acts exactly like MPI_Comm_dup except that theinfo hints associated with the communicator comm are not duplicated in newcomm. Thehints provided by the argument info are associated with the output communicator newcomminstead.",
      "input_parameters": [
        "comm: Communicator (handle).",
        "info: Info argument (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: Copy of comm (handle)."
      ]
    },
    {
      "name": "MPI_Comm_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Comm MPI_Comm_f2c(MPI_Fint comm)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_free(MPI_Comm *comm)",
      "description": "This operation marks the communicator object for deallocation. The handle is set to MPI_COMM_NULL. Any pending operations that use this communicator will complete normally; the object is actually deallocated only if there are no other active references to it. This call applies to intracommunicators and intercommunicators. Upon actual deallocation, the delete callback functions for all cached attributes (see Section 5.7 in the MPI-1 Standard, \"Caching\") are called in arbitrary order.",
      "input_parameters": [
        "comm: Communicator to be destroyed (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_free_keyval",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_free_keyval(int *comm_keyval)",
      "description": "MPI_Comm_free_keyval frees an extant attribute key. This function sets the value of keyval to MPI_KEYVAL_INVALID. Note that it is not erroneous to free an attribute key that is in use, because the actual free does not transpire until after all references (in other communicators on the process) to the key have been freed. These references need to be explicitly freed by the program, either via calls to MPI_Comm_delete_attr that free one attribute instance, or by calls to MPI_Comm_free that free all attribute instances associated with the freed communicator.",
      "input_parameters": [],
      "input_output_parameters": [
        "comm_keyval"
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_get_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void *attribute_val, int *flag)",
      "description": "MPI_Comm_get_attr retrieves an attribute value by key. The call is erroneous if there is no key with value keyval. On the other hand, the call is correct if the key value exists, but no attribute is attached on comm for that key; in that case, the call returns flag = false. In particular, MPI_KEYVAL_INVALID is an erroneous key value.",
      "input_parameters": [
        "comm: Communicator to which the attribute is attached (handle).",
        "comm_keyval: Key value (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "attribute_val: Attribute value, unless flag = false.",
        "flag: False if no attribute is associated with the key (logical)."
      ]
    },
    {
      "name": "MPI_Comm_get_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler)",
      "description": "MPI_Comm_get_errhandler retrieves the error handler currently associated with a communicator. This call is identical to MPI_Errhandler_get, the use of which is deprecated.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: New error handler for communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_get_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used)",
      "description": "MPI_Comm_get_info returns a new info object containing the hints ofthe communicator associated with comm.",
      "input_parameters": [
        "comm: Communicator from which to receive active info hints"
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "info_used: New info object returned with all active hints on this communicator."
      ]
    },
    {
      "name": "MPI_Comm_get_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_get_name(MPI_Comm comm, char *comm_name, int *resultlen)",
      "description": "MPI_Comm_get_name returns the last name that was previously associated with the given communicator. The name may be set and retrieved from any language. The same name will be returned independent of the language used. comm_name should be allocated so that it can hold a resulting string of length MPI_MAX_OBJECT_NAME characters. MPI_Comm_get_name returns a copy of the set name in comm_name.",
      "input_parameters": [
        "comm: Communicator the name of which is to be returned (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "comm_name: Name previously stored on the communicator, or an empty string if no such name exists (string).",
        "resultlen: Length of returned name (integer)."
      ]
    },
    {
      "name": "MPI_Comm_get_parent",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_get_parent(MPI_Comm *parent)",
      "description": "If a process was started with MPI_Comm_spawn or MPI_Comm_spawn_multiple, MPI_Comm_get_parent returns the \"parent\" intercommunicator of the current process. This parent intercommunicator is created implicitly inside of MPI_Init and is the same intercommunicator returned by the spawn call made in the parents.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "parent: The parent communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_group",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_group(MPI_Comm comm, MPI_Group *group)",
      "description": "If the communicator is an intercommunicator (enables communication between two groups of processes), this function returns the local group. To return the remote group, use the MPI_Comm_remote_group function.",
      "input_parameters": [
        "comm: Communicator."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "group: Group in communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_idup",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request)",
      "description": "MPI_Comm_idup starts the nonblocking duplication of an existing communicator comm with associated keyvalues. For each key value, the respective copy callback function determines the attribute value associated with this key in the new communicator; one particular action that a copy callback may take is to delete the attribute from the new communicator. Returns in newcomm a new communicator with the same group, any copied cached information, but a new context (see Section 5.7.1 of the MPI-1 Standard, \"Functionality\"). The communicator returned in newcomm will not be available until the request is complete.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: Copy of comm (handle).",
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Comm_join",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_join(int fd, MPI_Comm *intercomm)",
      "description": "MPI_Comm_join creates an intercommunicator from the union of two MPIprocesses that are connected by a socket. fd is a filedescriptor representing a socket of type SOCK_STREAM (a two-wayreliable byte-stream connection). Nonblocking I/O and asynchronousnotification via SIGIO must not be enabled for the socket. The socketmust be in a connected state, and must be quiescent when MPI_Comm_joinis called.",
      "input_parameters": [
        "fd: socket file descriptor (socket)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "intercomm: Intercommunicator between processes (handle)."
      ]
    },
    {
      "name": "MPI_Comm_rank",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_rank(MPI_Comm comm, int *rank)",
      "description": "This function gives the rank of the process in theparticular communicator's group. It is equivalent to accessing thecommunicator's group with MPI_Comm_group, computing the rank using MPI_Group_rank, and then freeing the temporary group via MPI_Group_free.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "rank: Rank of the calling process in group of comm (integer)."
      ]
    },
    {
      "name": "MPI_Comm_remote_group",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group)",
      "description": "MPI_Comm_remote_group accesses the remote group associated with an intercommunicator.",
      "input_parameters": [
        "comm: Communicator."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "group: Remote group of communicator."
      ]
    },
    {
      "name": "MPI_Comm_remote_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_remote_size(MPI_Comm comm, int *size)",
      "description": "MPI_Comm_remote_size determines the size of the remote group associated with an intercommunicator.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Number of processes in the remote group of comm (integer)."
      ]
    },
    {
      "name": "MPI_Comm_set_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val)",
      "description": "MPI_Comm_set_attr stores the stipulated attribute value attribute_val for subsequent retrieval by MPI_Comm_get_attr. If the value is already present, then the outcome is as if MPI_Comm_delete_attr was first called to delete the previous value (and the callback function delete_fn was executed), and a new value was next stored. The call is erroneous if there is no key with value comm_keyval; in particular MPI_KEYVAL_INVALID is an erroneous key value. The call will fail if the delete_fn function returned an error code other than MPI_SUCCESS.",
      "input_parameters": [
        "comm_keyval: Key value (integer).",
        "attribute_val: Attribute value."
      ],
      "input_output_parameters": [
        "comm: Communicator from which attribute will be attached (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_set_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler)",
      "description": "MPI_Comm_set_errhandler attaches a new error handler to a communicator. The error handler must be either a predefined error handler or an error handler created by a call to MPI_Comm_create_errhandler. This call is identical to MPI_Errhandler_set, the use of which is deprecated.",
      "input_parameters": [],
      "input_output_parameters": [
        "comm: Communicator (handle)."
      ],
      "output_parameters": [
        "errhandler: New error handler for communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_set_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info)",
      "description": "MPI_COMM_SET_INFO sets new values for the hints of the communicatorassociated with comm.",
      "input_parameters": [
        "comm: Communicator on which to set info hints",
        "info: Info object containing hints to be set onI comm"
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_set_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_set_name(MPI_Comm comm, const char *comm_name)",
      "description": "MPI_Comm_set_name allows a user to associate a name string with a communicator. The character string that is passed to MPI_Comm_set_name is saved inside the MPI library (so it can be freed by the caller immediately after the call, or allocated on the stack). Leading spaces in name are significant, but trailing ones are not.",
      "input_parameters": [
        "comm_name: Character string to be used as the identifier for the communicator (string)."
      ],
      "input_output_parameters": [
        "comm: Communicator whose identifier is to be set (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Comm_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_size(MPI_Comm comm, int *size)",
      "description": "This function indicates the number of processes involved in acommunicator. For MPI_COMM_WORLD, it indicates the total number ofprocesses available. This function is equivalent to accessing thecommunicator's group with MPI_Comm_group, computing the size usingMPI_Group_size, and then freeing the temporary group viaMPI_Group_free. If the communicator is an inter-communicator (enablescommunication between two groups), this function returns the size ofthe local group. To return the size of the remote group, use theMPI_Comm_remote_size function.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Number of processes in the group of comm (integer)."
      ]
    },
    {
      "name": "MPI_Comm_spawn",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[])",
      "description": "MPI_Comm_spawn tries to start maxprocs identical copies of the MPI program specified by command, establishing communication with them and returning an intercommunicator. The spawned processes are referred to as children. The children have their own MPI_COMM_WORLD, which is separate from that of the parents. MPI_Comm_spawn is collective over comm, and also may not return until MPI_Init has been called in the children. Similarly, MPI_Init in the children may not return until all parents have called MPI_Comm_spawn. In this sense, MPI_Comm_spawn in the parents and MPI_Init in the children form a collective operation over the union of parent and child processes. The intercommunicator returned by MPI_Comm_spawn contains the parent processes in the local group and the child processes in the remote group. The ordering of processes in the local and remote groups is the same as the as the ordering of the group of the comm in the parents and of MPI_COMM_WORLD of the children, respectively. This intercommunicator can be obtained in the children through the function MPI_Comm_get_parent.",
      "input_parameters": [
        "command: Name of program to be spawned (string, significant only at root).",
        "argv: Arguments to command (array of strings, significant only at root).",
        "maxprocs: Maximum number of processes to start (integer, significant only at root).",
        "info: A set of key-value pairs telling the runtime system where and how to start the processes (handle, significant only at root).",
        "root: Rank of process in which previous arguments are examined (integer).",
        "comm: Intracommunicator containing group of spawning processes (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "intercomm: Intercommunicator between original group and the newly spawned group (handle).",
        "array_of_errcodes: One code per process (array of integers)."
      ]
    },
    {
      "name": "MPI_Comm_spawn_multiple",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_of_argv[], const int array_of_maxprocs[], const MPI_Infoarray_of_info[], int root, MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[])",
      "description": "MPI_Comm_spawn_multiple is identical to MPI_Comm_spawn(3) except thatit can specify multiple executables. The first argument, count,indicates the number of executables. The next three arguments arearrays of the corresponding arguments in MPI_Comm_spawn(3). The nextargument, array_of_info, is an array of info arguments, onefor each executable. See the INFO ARGUMENTS section for more information.",
      "input_parameters": [
        "count: Number of commands (positive integer, significant to MPI only at root -- see NOTES).",
        "array_of_commands: Programs to be executed (array of strings, significant only at root).",
        "array_of_argv: Arguments for commands (array of array of strings, significant only at root).",
        "array_of_maxprocs: Maximum number of processes to start for each command (array of integers, significant only at root).",
        "array_of_info: Info objects telling the runtime system where and how to start processes (array of handles, significant only at root).",
        "root: Rank of process in which previous arguments are examined (integer).",
        "comm: Intracommunicator containing group of spawning processes (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "intercomm: Intercommunicator between original group and the newly spawned group (handle).",
        "array_of_errcodes: One code per process (array of integers)."
      ]
    },
    {
      "name": "MPI_Comm_split",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm)",
      "description": "This function partitions the group associated with comm into disjoint subgroups, one for each value of color. Each subgroup contains all processes of the same color. Within each subgroup, the processes are ranked in the order defined by the value of the argument key, with ties broken according to their rank in the old group. A new communicator is created for each subgroup and returned in newcomm. A process may supply the color value MPI_UNDEFINED, in which case newcomm returns MPI_COMM_NULL. This is a collective call, but each process is permitted to provide different values for color and key.",
      "input_parameters": [
        "comm: Communicator (handle).",
        "color: Control of subset assignment (nonnegative integer).",
        "key: Control of rank assignment (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: New communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_split_type",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm)",
      "description": "This function partitions the group associated with comm into disjoint subgroups, based onthe type specied by split_type. Each subgroup contains all processes of the same type.Within each subgroup, the processes are ranked in the order defined by the value of theargument key, with ties broken according to their rank in the old group. A new communicatoris created for each subgroup and returned in newcomm. This is a collective call;all processes must provide the same split_type, but each process is permitted to providedifferent values for key. An exception to this rule is that a process may supply the typevalue MPI_UNDEFINED, in which case newcomm returns MPI_COMM_NULL.",
      "input_parameters": [
        "comm: Communicator (handle).",
        "split_type: Type of processes to be grouped together (integer).",
        "key: Control of rank assignment (integer).",
        "info: Info argument (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newcomm: New communicator (handle)."
      ]
    },
    {
      "name": "MPI_Comm_test_inter",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Comm_test_inter(MPI_Comm comm, int *flag)",
      "description": "This local routine allows the calling process to determine the type of a communicator. It returns true for an intercommunicator, false for an intracommunicator.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag (Logical.)"
      ]
    },
    {
      "name": "MPI_Compare_and_swap",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Compare_and_swap(const void *origin_addr, const void *compare_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Win win)",
      "description": "This function compares one element of type datatype in the compare buffer compare_addr with the buffer at offset target_disp in the target window specified by target_rank and win and replaces the value at the target with the value in the origin buffer origin_addr if the compare buffer and the target buffer are identical.",
      "input_parameters": [
        "origin_addr: Initial address of buffer (choice).",
        "compare_addr: Initial address of compare buffer (choice).",
        "result_addr: Initial address of result buffer (choice).",
        "datatype: Data type of the entry in origin, result, and target buffers (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to beginning of target buffer (nonnegative integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Dims_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Dims_create(int nnodes, int ndims, int dims[])",
      "description": "For Cartesian topologies, the function MPI_Dims_create helps the user select a balanced distribution of processes per coordinate direction, depending on the number of processes in the group to be balanced and optional constraints that can be specified by the user. One use is to partition all the processes (the size of MPI_COMM_WORLD's group) into an n-dimensional topology.",
      "input_parameters": [
        "nnodes: Number of nodes in a grid (integer).",
        "ndims: Number of Cartesian dimensions (integer).",
        " IN/OUT PARAMETER"
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Dist_graph_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[], const int degrees[], const int destinations[], const int weights[], MPI_Info info, int reorder, MPI_Comm *comm_dist_graph)",
      "description": "MPI_Dist_graph_create creates a new communicator comm_dist_graph with distrubutedgraph topology and returns a handle to the new communicator.",
      "input_parameters": [
        "comm_old: Input communicator without topology (handle).",
        "n: Number of source nodes for which this process specifies edges (non-negative integer).",
        "sources: Array containing the n source nodes for which this process species edges (array of non-negative integers).",
        "degrees: Array specifying the number of destinations for each source node in the source node array (array of non-negative integers).",
        "destinations: Destination nodes for the source nodes in the source node array (array of non-negative integers).",
        "weights: Weights for source to destination edges (array of non-negative integers).",
        "Hints on optimization and interpretation of weights (handle).",
        "reorder: Ranking may be reordered (true) or not (false) (logical)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "comm_dist_graph: Communicator with distibuted graph topology added (handle)."
      ]
    },
    {
      "name": "MPI_Dist_graph_create_adjacent",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old, int indegree, const int sources[], const int sourceweights[], int outdegree, const int destinations[], const int destweights[], MPI_Info info, int reorder, MPI_Comm *comm_dist_graph)",
      "description": "MPI_Dist_graph_create_adjacent creats a new communicator comm_dist_graph with distrubutedgraph topology and returns a handle to the new communicator.",
      "input_parameters": [
        "comm_old: Input communicator without topology (handle).",
        "indegree: Size of sources and sourceweights arrays (non-negative integer).",
        "sources: Ranks of processes for which the calling process is a destination (array of non-negative integers).",
        "sourceweights: Weights of the edges into the calling process (array of non-negative integers).",
        "outdegree: Size of destinations and destweights arrays (non-negative integer).",
        "destinations: Ranks of processes for which the calling process is a source (array of non-negative integers).",
        "destweights: Weights of the edges out of the calling process (array of non-negative integers).",
        "Hints on optimization and interpretation of weights (handle).",
        "reorder: Ranking may be reordered (true) or not (false) (logical)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "comm_dist_graph: Communicator with distibuted graph topology added (handle)."
      ]
    },
    {
      "name": "MPI_Dist_graph_neighbors",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Dist_graph_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[])",
      "description": "MPI_Dist_graph_neighbors returns the source and destination ranks in a distributed graph topologyfor the calling process.",
      "input_parameters": [
        "comm: Communicator with distributed graph topology (handle).",
        "maxindegree: Size of sources and sourceweights arrays (non-negative integer).",
        "maxoutdegree: Size of destinations and destweights arrays (non-negative integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "sources: Processes for which the calling process is a destination (array of non-negative integers).",
        "sourceweights: Weights of the edges into the calling process (array of non-negative integers).",
        "destinations: Processes for which the calling process is a source (array of non-negative integers).",
        "destweights: Weights of the edges out of the calling process (array of non-negative integers)."
      ]
    },
    {
      "name": "MPI_Dist_graph_neighbors_count",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Dist_graph_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted)",
      "description": "MPI_Dist_graph_neighbors_count and MPI_Graph_neighbors provide adjacency information for a distributed graph topology. MPI_Dist_graph_neighbors_count returns the number of sources and destinations for the calling process.",
      "input_parameters": [
        "comm: Communicator with distributed graph topology (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "indegree: Number of edges into this process (non-negative integer).",
        "outdegree: Number of edges out of this process (non-negative integer).",
        "weighted: False if MPI_UNWEIGHTED was supplied during creation, true otherwise (logical)."
      ]
    },
    {
      "name": "MPI_Errhandler_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Errhandler_create(MPI_Handler_function *function, MPI_Errhandler *errhandler)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Comm_create_errhandler instead.",
      "input_parameters": [
        "function: User-defined error handling procedure."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: MPI error handler (handle)."
      ]
    },
    {
      "name": "MPI_Errhandler_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Errhandler_free(MPI_Errhandler *errhandler)",
      "description": "Marks the error handler associated with errhandler for deallocation and sets errhandler to MPI_ERRHANDLER_NULL. The error handler will be deallocated after all communicators associated with it have been deallocated.",
      "input_parameters": [
        "errhandler: MPI error handler (handle). Set to MPI_ERRHANDLER_NULL on exit."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Errhandler_get",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Comm_get_errhandler instead.",
      "input_parameters": [
        "comm: Communicator to get the error handler from (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: MPI error handler currently associated with communicator (handle)."
      ]
    },
    {
      "name": "MPI_Errhandler_set",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Comm_set_errhandler instead.",
      "input_parameters": [
        "comm: Communicator to set the error handler for (handle).",
        "errhandler: New MPI error handler for communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Error_class",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Error_class(int errorcode, int *errorclass)",
      "description": "The function MPI_Error_class maps each standard error code (error class) onto itself.",
      "input_parameters": [
        "errorcode: Error code returned by an MPI routine."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errorclass: Error class associated with errorcode."
      ]
    },
    {
      "name": "MPI_Error_string",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Error_string(int errorcode, char *string, int *resultlen)",
      "description": "Returns the error string associated with an error code or class. The argument string must represent storage that is at least MPI_MAX_ERROR_STRING characters long.",
      "input_parameters": [
        "errorcode: Error code returned by an MPI routine or an MPI error class."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "string: Text that corresponds to the errorcode.",
        "resultlen: Length of string."
      ]
    },
    {
      "name": "MPI_Exscan",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)",
      "description": "MPI_Exscan is used to perform an exclusive prefix reduction on datadistributed across the calling processes.",
      "input_parameters": [
        "sendbuf: Send buffer (choice).",
        "count: Number of elements in input buffer (integer).",
        "datatype: Data type of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Fetch_and_op",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Fetch_and_op(const void *origin_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win)",
      "description": "Accumulate one element of type datatype from the origin buffer (origin_addr) to the buffer at offset target_disp, in the target window specified by target_rank and win, using the operation op and return in the result buffer result_addr the contents of the target buffer before the accumulation.",
      "input_parameters": [
        "origin_addr: Initial address of buffer (choice).",
        "result_addr: Initial address of result buffer (choice).",
        "datatype: Data type of the entry in origin, result, and target buffers (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to beginning of target buffer (nonnegative integer).",
        "op: Reduce operation (handle).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_File_c2f(MPI_File file)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_call_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_call_errhandler(MPI_File fh, int errorcode)",
      "description": "This function invokes the error handler assigned to the file handle fh with the supplied error code errorcode. If the errorhandler was successfully called, the process is not aborted, and theerror handler returns, this function returns MPI_SUCCESS.",
      "input_parameters": [
        "fh: file with error handler (handle).",
        "errorcode: MPI error code (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_close",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_close(MPI_File *fh)",
      "description": "MPI_File_close first synchronizes file state, then closes the fileassociated with fh.",
      "input_parameters": [],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_create_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_create_errhandler(MPI_File_errhandler_function *function, MPI_Errhandler *errhandler)",
      "description": "Registers the user routine function for use as an MPI exception handler. Returns in errhandler a handle to the registered exception handler.",
      "input_parameters": [
        "function: User-defined error handling procedure (function)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: MPI error handler (handle)."
      ]
    },
    {
      "name": "MPI_File_delete",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_delete(const char *filename, MPI_Info info)",
      "description": "MPI_File_delete deletes the file identified by the file namefilename, provided it is not currently open by any process. It is an error to delete the file with MPI_File_delete if some process has it open, but MPI_File_delete does not check this. If the file does not exist, MPI_File_delete returns an error in the class MPI_ERR_NO_SUCH_FILE.",
      "input_parameters": [
        "filename: Name of file to delete (string).",
        "info: Info object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_File MPI_File_f2c(MPI_Fint file)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_get_amode",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_amode(MPI_File fh, int *amode)",
      "description": "MPI_File_get_amode returns, in amode, the access mode associated with the open file.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "amode: File access mode used to open the file (integer)."
      ]
    },
    {
      "name": "MPI_File_get_atomicity",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_atomicity(MPI_File fh, int *flag)",
      "description": "MPI_File_get_atomicity returns the current consistency semantics fordata access operations on the set of file handles created by onecollective MPI_File_open. If flag is true, atomic mode is currently enabled; if flag is false, nonatomic mode is currently enabled.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: true if atomic mode is enabled, false if nonatomic mode is enabled (boolean)."
      ]
    },
    {
      "name": "MPI_File_get_byte_offset",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_byte_offset(MPI_File fh, MPI_Offset offset, MPI_Offset *disp)",
      "description": "MPI_File_get_byte_offset converts an offset specified for the current view to its corresponding displacement value, or absolute byte position, from the beginning of the file. The absolute byte position of offset relative to the current view of fh is returned in disp.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: Offset (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "disp: Absolute byte position of offset (integer)."
      ]
    },
    {
      "name": "MPI_File_get_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_errhandler(MPI_File file, MPI_Errhandler*errhandler)",
      "description": "Returns in errhandler (a handle to) the error handler that is currently associated with file file.",
      "input_parameters": [
        "file: File (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: MPI error handler currently associated with file (handle)."
      ]
    },
    {
      "name": "MPI_File_get_group",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_group(MPI_File fh, MPI_Group *group)",
      "description": "MPI_File_get_group returns a duplicate of the group of the communicatorused to open the file associated with.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "group: Group that opened the file (handle)."
      ]
    },
    {
      "name": "MPI_File_get_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_info(MPI_File fh, MPI_Info *info_used)",
      "description": "MPI_File_get_info returns a new info object containing all the hints that the system currently associates with the file fh. The current setting of all hints actually used by the system related to this open file is returned in info_used. The user is responsible for freeing info_used via MPI_Info_free.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "info_used: New info object (handle)."
      ]
    },
    {
      "name": "MPI_File_get_position",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_position(MPI_File fh, MPI_Offset *offset)",
      "description": "MPI_File_get_position returns, in offset, the current position of the individual file pointer in etypeunits relative to the current displacement and file type.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "offset: Offset of the individual file pointer (integer)."
      ]
    },
    {
      "name": "MPI_File_get_position_shared",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_position_shared(MPI_File fh, MPI_Offset *offset)",
      "description": "MPI_File_get_position_shared returns, in offset, the current position of the shared file pointer in etypeunits relative to the current displacement and file type.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "offset: Offset of the shared file pointer (integer)."
      ]
    },
    {
      "name": "MPI_File_get_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_size(MPI_File fh, MPI_Offset *size)",
      "description": "MPI_File_get_size returns, in size, the current size in bytes of the file associated with the file handle fh. Note that the file size returned by Solaris may not represent the number of bytes physically allocated for the file in those cases where all bytes in this file have not been written at least once.",
      "input_parameters": [
        "fh: File handle (handle).",
        "size: Size of the file in bytes (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_get_type_extent",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_type_extent(MPI_File fh, MPI_Datatypedatatype, MPI_Aint *extent)",
      "description": "MPI_File_get_type_extent can be used to calculate extent for datatype in the file. The extent is the same for all processes accessing the file associated with fh. If the current view uses a user-defined data representation, MPI_File_get_type_extent uses the dtype_file_extent_fn callback to calculate the extent.",
      "input_parameters": [
        "fh: File handle (handle).",
        "datatype: Data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "extent: Data type extent (integer)."
      ]
    },
    {
      "name": "MPI_File_get_view",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_get_view(MPI_File fh, MPI_Offset *disp, MPI_Datatype *etype, MPI_Datatype *filetype, char *datarep)",
      "description": "The MPI_File_get_view routine returns the process's view of the datain the file. The current values of the displacement, etype, andfiletype are returned in disp, etype,and filetype,respectively.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "disp: Displacement (integer).",
        "etype: Elementary data type (handle).",
        "filetype: File type (handle). See Restrictions, below.",
        "datarep: Data representation (string)."
      ]
    },
    {
      "name": "MPI_File_iread",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iread(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iread is a nonblocking version of MPI_File_read. It attempts to read from the file associated with at the current individual file pointer position maintained by the system in which a total number of countdata items having datatype type are read into the user's buffer buf.",
      "input_parameters": [
        "count: Number of elements in the buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iread_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iread_all(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iread_all is a nonblocking version of MPI_File_read_all. It attempts to read from the file associated with fh at the current individual file pointer position maintained by the system in which a total number of countdata items having datatypetype are read into the user's buffer buf.",
      "input_parameters": [
        "count: Number of elements in the buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iread_at",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iread_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iread_at is the nonblocking version of MPI_File_read_at.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "count: Number of elements in the buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of the buffer (choice).",
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iread_at_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iread_at_all(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iread_at_all is the nonblocking version of MPI_File_read_at_all.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "count: Number of elements in the buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of the buffer (choice).",
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iread_shared",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iread_shared(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iread_shared is a nonblocking version of the MPI_File_read_shared interface. It uses the shared file pointer to read files. The order of serialization among the processors is not deterministic for this noncollective routine, so you need to use other methods of synchronization to impose a particular order among processors.",
      "input_parameters": [
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iwrite",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iwrite(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iwrite is a nonblocking version of the MPI_File_write interface. It attempts to write into the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iwrite_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iwrite_all(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iwrite_all is a nonblocking version of the MPI_File_write_all interface. It attempts to write into the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iwrite_at",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iwrite_at(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iwrite_at is a nonblocking version of MPI_File_write_at. It attempts to write into the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "offset: File offset (integer).",
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iwrite_at_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iwrite_at_all(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Request *request)",
      "description": "MPI_File_iwrite_at_all is a nonblocking version of MPI_File_write_at_all. It attempts to write into the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "offset: File offset (integer).",
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_iwrite_shared",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_iwrite_shared(MPI_File fh, const void *buf, int count, MPI_Datatypedatatype, MPI_Request *request)",
      "description": "MPI_File_iwrite_shared is a nonblocking routine that uses the shared file pointer to write files. The order of serialization is not deterministic for this noncollective routine, so you need to use other methods of synchronization to impose a particular order.",
      "input_parameters": [
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "request: Request object (handle)."
      ]
    },
    {
      "name": "MPI_File_open",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_open(MPI_Comm comm, const char *filename, int amode, MPI_Info info, MPI_File *fh)",
      "description": "MPI_File_open opens the file identified by the filename.Ifilenameon all processes in the commcommunicator group.",
      "input_parameters": [
        "comm: Communicator (handle).",
        "filename: Name of file to open (string).",
        "amode: File access mode (integer).",
        "info: Info object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "fh: New file handle (handle)."
      ]
    },
    {
      "name": "MPI_File_preallocate",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_preallocate(MPI_File fh, MPI_Offset size)",
      "description": "MPI_File_preallocate ensures that storage space is allocated for the first size bytes of the file associated with fh. MPI_File_preallocate can be a very time-consuming operation.",
      "input_parameters": [
        "size: Size to preallocate file, in bytes (integer)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_read",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_read attempts to read from the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype into the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of buffer (integer).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_read_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_all(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_read_all is a collective routine that attempts to read from the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype into the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_read_all_begin",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_all_begin(MPI_File fh, void *buf, int count, MPI_Datatype datatype)",
      "description": "MPI_File_read_all_begin is the beginning part of a split collective operation that attempts to read from the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype into the user's buffer buf.",
      "input_parameters": [
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice)."
      ]
    },
    {
      "name": "MPI_File_read_all_end",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_all_end(MPI_File fh, void *buf, MPI_Status *status)",
      "description": "MPI_File_read_all_end is the ending part of a split collective operation that stores the number of elements actually read from the file associated with fh(at the current individual file pointer position maintained by the system)into the user's buffer bufin status.",
      "input_parameters": [],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_read_at",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_read_at attempts to read from the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype into the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_read_at_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_read_at_all is a collective routine that attempts to read from the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype into the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_read_at_all_begin",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_at_all_begin(MPI_File fh, MPI_Offsetoffset, void *buf, int count, MPI_Datatypedatatype)",
      "description": "MPI_File_read_at_all_begin is the beginning part of a split collective routine that attempts to read from the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype into the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of buffer (choice)."
      ]
    },
    {
      "name": "MPI_File_read_at_all_end",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_at_all_end(MPI_File fh, void *buf, MPI_Status *status)",
      "description": "MPI_File_read_at_all_end is a split collective routine that stores the number of elements actually read from the file associated with fh in status.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_read_ordered",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_ordered(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_read_ordered is a collective routine. This routine must becalled by all processes in the communicator group associated with thefile handle fh.",
      "input_parameters": [
        "fh: File handle (handle).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (Status)."
      ]
    },
    {
      "name": "MPI_File_read_ordered_begin",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_ordered_begin(MPI_File fh, void *buf, int count, MPI_Datatype datatype)",
      "description": "MPI_File_read_ordered_begin is the beginning part of a split collective, nonblocking routine that must becalled by all processes in the communicator group associated with thefile handle fh.",
      "input_parameters": [
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice)."
      ]
    },
    {
      "name": "MPI_File_read_ordered_end",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_ordered_end(MPI_File fh, void *buf, MPI_Status *status)",
      "description": "MPI_File_read_ordered_end is the ending part of a split collective routine that must be called by all processes in the communicator group associated with thefile handle fh.",
      "input_parameters": [],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_read_shared",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_read_shared(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_read_shared is a blocking routine that uses the shared file pointer to read files. The order of serialization is not deterministic for this noncollective routine.",
      "input_parameters": [
        "count: Number of elements in buffer (integer)",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "buf: Initial address of buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_seek",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence)",
      "description": "MPI_File_seek updates the individual file pointer according to whence, which could have the following possible values: oMPI_SEEK_SET - The pointer is set to offset. oMPI_SEEK_CUR - The pointer is set to the current pointer position plus offset. oMPI_SEEK_END - The pointer is set to the end of the file plus offset.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "whence: Update mode (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_seek_shared",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence)",
      "description": "MPI_File_seek_shared updates the shared file pointer according to whence, which could have the following possible values: oMPI_SEEK_SET - The pointer is set to offset. oMPI_SEEK_CUR - The pointer is set to the current pointer position plus offset. oMPI_SEEK_END - The pointer is set to the end of the file plus offset.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "whence: Update mode (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_set_atomicity",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_set_atomicity(MPI_File fh, int flag)",
      "description": "The consistency semantics for data-access operations using the set offile handles created by one collective MPI_File_open is set by collectivelycalling MPI_File_set_atomicity. All processes in the group must pass identical values for fh and flag.",
      "input_parameters": [
        "fh: File handle (handle).",
        "flag: true to enable atomic mode, false to enable nonatomic mode (boolean)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_set_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_set_errhandler(MPI_File file, MPI_Errhandler errhandler)",
      "description": "Attaches a new error handler to a file. The error handler must be either a predefined error handler or an error handler created by a call to MPI_File_create_errhandler.",
      "input_parameters": [
        "errhandler: New error handler for file (handle)."
      ],
      "input_output_parameters": [
        "file: File (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_set_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_set_info(MPI_File fh, MPI_Info info)",
      "description": "MPI_File_set_info is a collective routine that sets new values for the hints of the file associated with fh. These hints are set for each file, using the MPI_File_open, MPI_File_delete, MPI_File_set_view, and MPI_File_set_info routines. The opaque info object, which allows you to provide hints for optimization of your code, may be different on each process, but some info entries are required to be the same on all processes: In these cases, they must appear with the same value in each process's info object. See the HINTS section for a list of hints that can be set.",
      "input_parameters": [
        "info: Info object (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_set_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_set_size(MPI_File fh, MPI_Offset size)",
      "description": "MPI_File_set_size resizes the file associated with the file handle fh, truncating UNIX files as necessary. MPI_File_set_size is collective; allprocesses in the group must pass identical values for size.",
      "input_parameters": [
        "fh: File handle (handle).",
        "size: Size to truncate or expand file (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_set_view",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, const char *datarep, MPI_Info info)",
      "description": "The MPI_File_set_view routine changes the process's view of the datain the file -- the beginning of the data accessible in the file throughthat view is set to disp.",
      "input_parameters": [
        "disp: Displacement (integer).",
        "etype: Elementary data type (handle).",
        "filetype: File type (handle). See Restrictions, below.",
        "datarep: Data representation (string).",
        "info: Info object (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_sync",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_sync(MPI_File fh)",
      "description": "Calling MPI_File_sync with fh causes all previous writes to fh by the calling process to be written to permanent storage. If other processes have made updates to permanent storage, then all such updates become visible to subsequent reads of fh by the calling process.",
      "input_parameters": [
        "fh: File handle (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_File_write",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_write attempts to write into the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_write_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_all(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_write_all is a collective routine that attempts to write into the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_write_all_begin",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_all_begin(MPI_File fh, const void *buf, int count, MPI_Datatype datatype)",
      "description": "MPI_File_write_all_begin is the beginning part of a split collective, nonblocking routine that attempts to write into the file associated with fh(at the current individual file pointer position maintained by the system) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_write_all_end",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_all_end(MPI_File fh, const void *buf, MPI_Status *status)",
      "description": "MPI_File_write_all_end is the ending part of a split collective routine that stores thenumber of elements actually written into the file associated with fh from the user's buffer bufin status.",
      "input_parameters": [
        "buf: Initial address of buffer (choice)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_write_at",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_write_at attempts to write into the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_write_at_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_write_at_all is a collective routine that attempts to write into the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "fh: File handle (handle).",
        "offset: File offset (integer).",
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_write_at_all_begin",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_at_all_begin(MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype)",
      "description": "MPI_File_write_at_all_begin is the beginning part of a split collective, that is, a nonblocking routine that attempts to write into the file associated with fh(at the offsetposition) a total number of countdata items having datatypetype from the user's buffer buf.",
      "input_parameters": [
        "offset: File offset (handle).",
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_write_at_all_end",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_at_all_end(MPI_File fh, const void *buf, MPI_Status *status)",
      "description": "MPI_File_write_at_all_end is the ending part of a split collective routine that stores thenumber of elements actually written into the file associated with in status.",
      "input_parameters": [
        "buf: Initial address of buffer (choice)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_write_ordered",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_ordered(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_write_ordered is a collective routine. This routine mustbe called by all processes in the communicator group associated withthe file handle fh.",
      "input_parameters": [
        "fh: File handle (handle).",
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "status: Status object (Status)."
      ]
    },
    {
      "name": "MPI_File_write_ordered_begin",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_ordered_begin(MPI_File fh, const void *buf, int count, MPI_Datatype datatype)",
      "description": "MPI_File_write_ordered_begin is the beginning part of a split collective, nonblocking routine that mustbe called by all processes in the communicator group associated withthe file handle fh.",
      "input_parameters": [
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_File_write_ordered_end",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_ordered_end(MPI_File fh, const void *buf, MPI_Status *status)",
      "description": "MPI_File_write_ordered_end is the ending part of a split collective routine that mustbe called by all processes in the communicator group associated withthe file handle.",
      "input_parameters": [
        "buf: Initial address of buffer (choice)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_File_write_shared",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_File_write_shared(MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status)",
      "description": "MPI_File_write_shared is a blocking routine that uses the shared file pointer to write files. The order of serialization is not deterministic for this noncollective routine.",
      "input_parameters": [
        "buf: Initial address of buffer (choice).",
        "count: Number of elements in buffer (integer).",
        "datatype: Data type of each buffer element (handle)."
      ],
      "input_output_parameters": [
        "fh: File handle (handle)."
      ],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Finalize",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Finalize()",
      "description": "This routine cleans up all MPI states. Once this routine is called, no MPI routine (not even MPI_Init) may be called, except for MPI_Get_version, MPI_Initialized, and MPI_Finalized. Unless there has been a call to MPI_Abort, you must ensure that all pending communications involving a process are complete before the process calls MPI_Finalize. If the call returns, each process may either continue local computations or exit without participating in further communication with other processes. At the moment when the last process calls MPI_Finalize, all pending sends must be matched by a receive, and all pending receives must be matched by a send.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Finalized",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Finalized(int *flag)",
      "description": "This routine may be used to determine whether MPI has been finalized. It is one of a small number of routines that may be called before MPIis initialized and after MPI has been finalized (MPI_Initialized isanother).",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: True if MPI was finalized, and false otherwise (logical)."
      ]
    },
    {
      "name": "MPI_Free_mem",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Free_mem(void *base)",
      "description": "MPI_Free_mem frees memory that has been allocated by MPI_Alloc_mem.",
      "input_parameters": [
        "base: Initial address of memory segment allocated by MPI_Alloc_mem (choice)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Gather",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)",
      "description": "Each process (root process included) sends the contents of its send buffer to the root process. The root process receives the messages and stores them in rank order. The outcome is as if each of the n processes in the group (including the root process) had executed a call to",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements for any single receive (integer, significant only at root).",
        "recvtype: Datatype of recvbuffer elements (handle, significant only at root).",
        "root: Rank of receiving process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice, significant only at root).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Gatherv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm)",
      "description": "MPI_Gatherv extends the functionality of MPI_Gather by allowing a varying count of data from each process, since recvcounts is now an array. It also allows more flexibility as to where the data is placed on the root, by providing the new argument, displs.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcounts: Integer array (of length group size) containing the number of elements that: are received from each process (significant only at root).",
        "displs: Integer array (of length group size). Entry i specifies the displacement relative to recvbuf at which to place the incoming data from process i (significant only at root).",
        "recvtype: Datatype of recv buffer elements (significant only at root) (handle).",
        "root: Rank of receiving process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice, significant only at root).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Get",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Get(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win)",
      "description": "MPI_Get copies data from the target memory to the origin, similar to MPI_Put, except that the direction of data transfer is reversed. The origin_datatype may not specify overlapping entries in the origin buffer. The target buffer must be contained within the target window, and the copied data must fit, without truncation, in the origin buffer. Only processes within the same node can access the target window.",
      "input_parameters": [
        "origin_addr: Initial address of origin buffer (choice).",
        "origin_count: Number of entries in origin buffer (nonnegative integer).",
        "origin_datatype: Data type of each entry in origin buffer (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from window start to the beginning of the target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: datatype of each entry in target buffer (handle)",
        "win: window object used for communication (handle)"
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Get_accumulate",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win)",
      "description": "MPI_Get_accumulate is a function used for one-sided MPI communication that adds the contents of the origin buffer (as defined by origin_addr, origin_count, and origin_datatype) to the buffer specified by the arguments target_count and target_datatype, at offset target_disp, in the target window specified by target_rank and win, using the operation op. MPI_Get_accumulate returns in the result buffer result_addr the contents of the target buffer before the accumulation.",
      "input_parameters": [
        "origin_addr: Initial address of buffer (choice).",
        "origin_count: Number of entries in buffer (nonnegative integer).",
        "origin_datatype: Data type of each buffer entry (handle).",
        "result_addr: Initial address of result buffer (choice).",
        "result_count: Number of entries in result buffer (nonnegative integer).",
        "result_datatype: Data type of each result buffer entry (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to beginning of target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: Data type of each entry in target buffer (handle).",
        "op: Reduce operation (handle).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Get_address",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_address(const void *location, MPI_Aint *address)",
      "description": "MPI_Get_address returns the byte address of a location in memory.",
      "input_parameters": [
        "location: Location in caller memory (choice)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "address: Address of location (integer)."
      ]
    },
    {
      "name": "MPI_Get_count",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count)",
      "description": "Returns the number of entries received. (We count entries, each of typedatatype, not bytes.) The datatype argument should match the argumentprovided by the receive call that set the status variable. (As explained in Section 3.12.5 in the MPI-1 Standard, \"Use of General Datatypes in Communication,\" MPI_Get_count may, in certain situations, return the value MPI_UNDEFINED.)",
      "input_parameters": [
        "status: Return status of receive operation (status).",
        "datatype: Datatype of each receive buffer element (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "count: Number of received elements (integer)."
      ]
    },
    {
      "name": "MPI_Get_elements",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, int *count)",
      "description": "MPI_Get_elements and MPI_Get_elements_x behave different from MPI_Get_count, which returns the number of \"top-level entries\" received, i.e., the number of \"copies\" of type datatype. MPI_Get_count may return any integer value k, where 0 =< k =< count. If MPI_Get_count returns k, then the number of basic elements received (and the value returned by MPI_Get_elements and MPI_Get_elements_x) is n * k, where n is the number of basic elements in the type map of datatype. If the number of basic elements received is not a multiple of n, that is, if the receive operation has not received an integral number of datatype \"copies,\" then MPI_Get_count returns the value MPI_UNDEFINED. For both functions, if the count parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED.",
      "input_parameters": [
        "status: Return status of receive operation (status).",
        "datatype: Datatype used by receive operation (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "count: Number of received basic elements (integer)."
      ]
    },
    {
      "name": "MPI_Get_elements_x",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_elements_x(const MPI_Status *status, MPI_Datatype datatype, MPI_Count *count)",
      "description": "MPI_Get_elements and MPI_Get_elements_x behave different from MPI_Get_count, which returns the number of \"top-level entries\" received, i.e., the number of \"copies\" of type datatype. MPI_Get_count may return any integer value k, where 0 =< k =< count. If MPI_Get_count returns k, then the number of basic elements received (and the value returned by MPI_Get_elements and MPI_Get_elements_x) is n * k, where n is the number of basic elements in the type map of datatype. If the number of basic elements received is not a multiple of n, that is, if the receive operation has not received an integral number of datatype \"copies,\" then MPI_Get_count returns the value MPI_UNDEFINED. For both functions, if the count parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED.",
      "input_parameters": [
        "status: Return status of receive operation (status).",
        "datatype: Datatype used by receive operation (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "count: Number of received basic elements (integer)."
      ]
    },
    {
      "name": "MPI_Get_library_version",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_library_version(char *version, int *resultlen)",
      "description": "This routine returns a string representing the version of the MPIlibrary. The version argument is a character string for maximumflexibility.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "version: A string containing the Open MPI version (string).",
        ": resultlen: Length (in characters) of result returned in version (integer)."
      ]
    },
    {
      "name": "MPI_Get_processor_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_processor_name(char *name, int *resultlen)",
      "description": "This routine returns the name of the processor on which it was called at the moment of the call. The name is a character string for maximum flexibility. From this value it must be possible to identify a specific piece of hardware. The argument name must represent storage that is at least MPI_MAX_PROCESSOR_NAME characters long.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "name: A unique specifier for the actual (as opposed to virtual) node.",
        "resultlen: Length (in characters) of result returned in name."
      ]
    },
    {
      "name": "MPI_Get_version",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Get_version(int *version, int *subversion)",
      "description": "Since Open MPI is MPI 3.1 compliant, this function will return a version value of 3 and a subversion value of 1 for this release.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "version: The major version number of the corresponding standard (integer).",
        ": subversion: The minor version number of the corresponding standard (integer)."
      ]
    },
    {
      "name": "MPI_Graphdims_get",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges)",
      "description": "Functions MPI_Graphdims_get and MPI_Graph_get retrieve the graph-topology information that was associated with a communicator by MPI_Graph_create.",
      "input_parameters": [
        "comm: Communicator for group with graph structure (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "nnodes: Number of nodes in graph (integer).",
        "nedges: Number of edges in graph (integer)."
      ]
    },
    {
      "name": "MPI_Graph_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Graph_create(MPI_Comm comm_old, int nnodes, const int index[], const int edges[], int reorder, MPI_Comm *comm_graph)",
      "description": "MPI_Graph_create returns a handle to a new communicator to which the graph topology information is attached. If reorder = false then the rank of each process in the new group is identical to its rank in the old group. Otherwise, the function may reorder the processes. If the size, nnodes, of the graph is smaller than the size of the group of comm_old, then some processes are returned MPI_COMM_NULL, in analogy to MPI_Cart_create and MPI_Comm_split. The call is erroneous if it specifies a graph that is larger than the group size of the input communicator.",
      "input_parameters": [
        "comm_old: Input communicator without topology (handle).",
        "nnodes: Number of nodes in graph (integer).",
        "index: Array of integers describing node degrees (see below).",
        "edges: Array of integers describing graph edges (see below).",
        "reorder: Ranking may be reordered (true) or not (false) (logical)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "comm_graph: Communicator with graph topology added (handle)."
      ]
    },
    {
      "name": "MPI_Graph_get",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges, int index[], int edges[])",
      "description": "Functions MPI_Graphdims_get and MPI_Graph_get retrieve the graph-topology information that was associated with a communicator by MPI_Graph_create.",
      "input_parameters": [
        "comm: Communicator with graph structure (handle).",
        "maxindex: Length of vector index in the calling program (integer).",
        "maxedges: Length of vector edges in the calling program (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "index: Array of integers containing the graph structure (for details see the definition of MPI_Graph_create).",
        "edges: Array of integers containing the graph structure."
      ]
    },
    {
      "name": "MPI_Graph_map",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Graph_map(MPI_Comm comm, int nnodes, const int index[], const int edges[], int *newrank)",
      "description": "MPI_Cart_map and MPI_Graph_map can be used to implement all other topologyfunctions. In general they will not be called by the user directly, unless he or she is creating additional virtual topology capability other than that provided by MPI.",
      "input_parameters": [
        "comm: Input communicator (handle).",
        "nnodes: Number of graph nodes (integer).",
        "index: Integer array specifying the graph structure, see MPI_Graph_ create.",
        "edges: Integer array specifying the graph structure."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newrank: Reordered rank of the calling process; MPI_UNDEFINED if the calling process does not belong to graph (integer)."
      ]
    },
    {
      "name": "MPI_Graph_neighbors",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors, int neighbors[])",
      "description": "Example: Suppose that comm is a communicator with a shuffle-exchangetopology. The group has 2n members. Each process is labeled by a(1), ..., a(n) with a(i) E{0,1}, and has three neighbors: exchange (a(1), ..., a(n) = a(1), ..., a(n-1), a(n) (a = 1 - a), shuffle (a(1), ..., a(n)) = a(2), ..., a(n), a(1), and unshuffle (a(1), ..., a(n)) = a(n), a(1), ..., a(n-1). The graph adjacency list is illustrated below for n=3.",
      "input_parameters": [
        "comm: Communicator with graph topology (handle).",
        "rank: Rank of process in group of comm (integer).",
        "maxneighbors: Size of array neighbors (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "neighbors: Ranks of processes that are neighbors to specified process (array of integers)."
      ]
    },
    {
      "name": "MPI_Graph_neighbors_count",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors)",
      "description": "MPI_Graph_neighbors_count and MPI_Graph_neighbors provide adjacency information for a general, graph topology. MPI_Graph_neighbors_count returns the number of neighbors for the process signified by rank.",
      "input_parameters": [
        "comm: Communicator with graph topology (handle).",
        "rank: Rank of process in group of comm (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "nneighbors: Number of neighbors of specified process (integer)."
      ]
    },
    {
      "name": "MPI_Grequest_complete",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Grequest_complete(MPI_Request request)",
      "description": "MPI_Grequest_complete informs MPI that the operations represented by the generalized request request are complete. A call to MPI_Wait(request, status) will return, and a call to MPI_Test(request, flag, status) will return flag=true only after a call to MPI_Grequest_complete has declared that these operations are complete.",
      "input_parameters": [],
      "input_output_parameters": [
        "request: Generalized request (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Grequest_start",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Grequest_start(MPI_Grequest_query_function *query_fn, MPI_Grequest_free_function *free_fn, MPI_Grequest_cancel_function *cancel_fn, void *extra_state, MPI_Request *request)",
      "description": "MPI_Grequest_start starts a generalized request and returns a handle to it in request.",
      "input_parameters": [
        "query_fn: Callback function invoked when request status is queried (function).",
        "free_fn: Callback function invoked when request is freed (function).",
        "cancel_fn: Callback function invoked when request is canceled (function).",
        "extra_state: Extra state."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Generalized request (handle)."
      ]
    },
    {
      "name": "MPI_Group_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Group_c2f(MPI Group group)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Group_compare",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_compare(MPI_Group group1, MPI_Group group2, int *result)",
      "description": "MPI_IDENT results if the group members and group order is exactly the same in both groups. This happens for instance if group1 and group2 are the same handle. MPI_SIMILAR results if the group members are the same but the order is different. MPI_UNEQUAL results otherwise.",
      "input_parameters": [
        "group1: First group (handle).",
        "group2: Second group (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "result: Integer which is MPI_IDENT if the order and members of the two groups are the same, MPI_SIMILAR if only the members are the same, and MPI_UNEQUAL otherwise."
      ]
    },
    {
      "name": "MPI_Group_difference",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_difference(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)",
      "description": "The set-like operations are defined as follows: ounion -- All elements of the first group (group1), followed by all elementsof second group (group2) that are not in the first group ointersect -- all elements of the first group that are also in the secondgroup, ordered as in first group odifference -- all elements of the first group that are not in the second group, ordered as in the first group.LPNote that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative.",
      "input_parameters": [
        "group1: First group (handle).",
        "group2: Second group (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newgroup: Difference group (handle)."
      ]
    },
    {
      "name": "MPI_Group_excl",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup)",
      "description": "The function MPI_Group_excl creates a group of processes newgroup that is obtained by deleting from group those processes with ranks ranks[0], &... ranks[n-1]. The ordering of processes in newgroup is identical to the ordering in group. Each of the n elements of ranks must be a valid rank in group and all elements must be distinct; otherwise, the call is erroneous. If n = 0, then newgroup is identical to group.",
      "input_parameters": [
        "group: Group (handle).",
        "n: Number of elements in array ranks (integer).",
        "ranks: Array of integer ranks in group not to appear in newgroup."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newgroup: New group derived from above, preserving the order defined by group (handle)."
      ]
    },
    {
      "name": "MPI_Group_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Group MPI_Group_f2c(MPI Fint group)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Group_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_free(MPI_Group *group)",
      "description": "This operation marks a group object for deallocation. The handle group is set to MPI_GROUP_NULL by the call. Any ongoing operation using this group will complete normally.",
      "input_parameters": [],
      "input_output_parameters": [
        "group: Group (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Group_incl",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup)",
      "description": "The function MPI_Group_incl creates a group group_out that consists of the n processes in group with ranks rank[0], &..., rank[n-1]; the process with rank i in group_out is the process with rank ranks[i] in group. Each of the n elements of ranks must be a valid rank in group and all elements must be distinct, or else the program is erroneous. If n = 0, then group_out is MPI_GROUP_EMPTY. This function can, for instance, be used to reorder the elements of a group.",
      "input_parameters": [
        "group: Group (handle).",
        "n: Number of elements in array ranks (and size of newgroup)(integer).",
        "ranks: Ranks of processes in group to appear in newgroup (array of integers)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newgroup: New group derived from above, in the order defined by ranks (handle)."
      ]
    },
    {
      "name": "MPI_Group_intersection",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)",
      "description": "The set-like operations are defined as follows: ounion -- All elements of the first group (group1), followed by all elementsof second group (group2) not in first. ointersect -- all elements of the first group that are also in the secondgroup, ordered as in first group. odifference -- all elements of the first group that are not in the second group, ordered as in the first group..LPNote that for these operations the order of processes in the output group is determined primarily by order in the first group (if possible) and then, if necessary, by order in the second group. Neither union nor intersection are commutative, but both are associative.",
      "input_parameters": [
        "group1: First group (handle).",
        "group2: Second group (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newgroup: Intersection group (handle)."
      ]
    },
    {
      "name": "MPI_Group_range_excl",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_range_excl(MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup)",
      "description": "Each computed rank must be a valid rank in group and all computed ranks must be distinct, or else the program is erroneous.",
      "input_parameters": [
        "group: Group (handle).",
        "n: Number of triplets in array ranges (integer).",
        "ranges: A one-dimensional array of integer triplets of the form (first rank, last rank, stride), indicating the ranks in group of processes to be excluded from the output group newgroup."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newgroup: New group derived from above, preserving the order in group (handle)."
      ]
    },
    {
      "name": "MPI_Group_range_incl",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_range_incl(MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup)",
      "description": "If ranges consist of the triplets",
      "input_parameters": [
        "group: Group (handle).",
        "n: Number of triplets in array ranges (integer).",
        "ranges: A one-dimensional array of integer triplets, of the form (first rank, last rank, stride) indicating ranks in group or processes to be included in newgroup."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newgroup: New group derived from above, in the order defined by ranges (handle)."
      ]
    },
    {
      "name": "MPI_Group_rank",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_rank(MPI_Group group, int *rank)",
      "description": "MPI_Group_rank returns as the output parameter rank the rank of the calling process in group. If the process is not a member of group then MPI_UNDEFINED is returned.",
      "input_parameters": [
        "group: Group (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "rank: Rank of the calling process in group, or MPI_UNDEFINED if the process is not a member (integer)."
      ]
    },
    {
      "name": "MPI_Group_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_size(MPI_Group group, int *size)",
      "description": "MPI_Group_size returns in size the number of processes in the group. Thus, if group = MPI_GROUP_EMPTY, then the call will return size = 0. On the other hand, a call with group = MPI_GROUP_NULL is erroneous.",
      "input_parameters": [
        "group: Group (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Number of processes in the group (integer)."
      ]
    },
    {
      "name": "MPI_Group_translate_ranks",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_translate_ranks(MPI_Group group1, int n, const int ranks1[], MPI_Group group2, int ranks2[])",
      "description": "This function is important for determining the relative numbering of the same processes in two different groups. For instance, if one knows the ranks of certain processes in the group of MPI_COMM_WORLD, one might want to know their ranks in a subset of that group.",
      "input_parameters": [
        "group1: First group (handle).",
        "n: Number of ranks in ranks1 and ranks2 arrays (integer).",
        "ranks1: Array of zero or more valid ranks in group1.",
        "group2: Second group (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "ranks2: Array of corresponding ranks in group2, MPI_UNDEFINED when no correspondence exists."
      ]
    },
    {
      "name": "MPI_Group_union",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)",
      "description": "The set-like operations are defined as follows: ounion -- All elements of the first group (group1), followed by all elementsof second group (group2) not in first. ointersect -- all elements of the first group that are also in the secondgroup, ordered as in first group. odifference -- all elements of the first group that are not in the second group, ordered as in the first group.",
      "input_parameters": [
        "group1: First group (handle).",
        "group2: Second group (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newgroup: Union group (handle)."
      ]
    },
    {
      "name": "MPI_Iallgather",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Allgather is similar to MPI_Gather, except that all processes receive the result, instead of just the root. In other words, all processes contribute to the result, and all processes receive the result.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvbuf: Starting address of recv buffer (choice).",
        "recvcount: Number of elements received from any process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Iallgatherv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Allgatherv is similar to MPI_Allgather in that all processes gather data from all other processes, except that each process can send a different amount of data. The block of data sent from the jth process is received by every process and placed in the jth block of the buffer recvbuf.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Integer array (of length group size) containing the number of elements that are received from each process.",
        "displs: Integer array (of length group size). Entry i specifies the displacement (relative to recvbuf) at which to place the incoming data from process i.",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Iallreduce",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)",
      "description": "Same as MPI_Reduce except that the result appears in the receive buffer of all the group members.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Datatype of elements of send buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ialltoall",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Alltoall is a collective operation in which all processes send the same amount of data to each other, and receive the same amount of data from each other. The operation of this routine can be represented as follows, where each process performs 2n (n being the number of processes in communicator comm) independent point-to-point communications (including communication with itself).",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements to send to each process (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements to receive from each process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator over which data is to be exchanged (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ialltoallv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Alltoallv is a generalized collective operation in which allprocesses send data to and receive data from all other processes. Itadds flexibility to MPI_Alltoall by allowing the user to specify datato send and receive vector-style (via a displacement and elementcount). The operation of this routine can be thought of as follows,where each process performs 2n (n being the number of processes incommunicator comm) independent point-to-point communications(including communication with itself).",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send: to rank i.",
        "sdispls: Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype) from which to send data to rank i.",
        "sendtype: Datatype of send buffer elements.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from rank j.",
        "rdispls: Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to which data from rank j should be written.",
        "recvtype: Datatype of receive buffer elements.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ialltoallw",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Alltoallw is a generalized collective operation in which allprocesses send data to and receive data from all other processes. Itadds flexibility to MPI_Alltoallv by allowing the user to specify thedatatype of individual data blocks (in addition to displacement andelement count). Its operation can be thought of in the following way, where each process performs 2n (n being the number of processes incommunicator comm) independent point-to-point communications(including communication with itself).",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send: to rank i.",
        "sdispls: Integer array, where entry i specifies the displacement (in bytes, offset from sendbuf) from which to send data to rank i.",
        "sendtypes: Datatype array, where entry i specifies the datatype to use when: sending data to rank i.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from rank j.",
        "rdispls: Integer array, where entry j specifies the displacement (in bytes, offset from recvbuf) to which data from rank j should be written.",
        "recvtypes: Datatype array, where entry j specifies the datatype to use when receiving data from rank j.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ibarrier",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request)",
      "description": "An MPI barrier completes after all group members have entered thebarrier.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ibcast",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Bcast broadcasts a message from the process with rank root to all processes of the group, itself included. It is called by all members of group using the same arguments for comm, root. On return, the contents of root's communication buffer has been copied to all processes.",
      "input_output_parameters": [
        "buffer: Starting address of buffer (choice).",
        "count: Number of entries in buffer (integer).",
        "datatype: Data type of buffer (handle).",
        "root: Rank of broadcast root (integer).",
        "comm: Communicator (handle)."
      ],
      "output_parameters": [
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ibsend",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Ibsend posts a buffered-mode, nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Data type of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Iexscan",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Exscan is used to perform an exclusive prefix reduction on datadistributed across the calling processes. The operation returns, inthe recvbuf of the process with rank i, the reduction(calculated according to the function op) of the values in thesendbufs of processes with ranks 0, ..., i-1. Compare this withthe functionality of MPI_Scan, which calculates over the range 0, ...,i (inclusive). The type of operations supported, their semantics, andthe constraints on send and receive buffers are as for MPI_Reduce.",
      "input_parameters": [
        "sendbuf: Send buffer (choice).",
        "count: Number of elements in input buffer (integer).",
        "datatype: Data type of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Igather",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)",
      "description": "Each process (root process included) sends the contents of its send buffer to the root process. The root process receives the messages and stores them in rank order. The outcome is as if each of the n processes in the group (including the root process) had executed a call to",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements for any single receive (integer, significant only at root).",
        "recvtype: Datatype of recvbuffer elements (handle, significant only at root).",
        "root: Rank of receiving process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice, significant only at root).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Igatherv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Gatherv extends the functionality of MPI_Gather by allowing a varying count of data from each process, since recvcounts is now an array. It also allows more flexibility as to where the data is placed on the root, by providing the new argument, displs.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcounts: Integer array (of length group size) containing the number of elements that are received from each process (significant only at root).",
        "displs: Integer array (of length group size). Entry i specifies the displacement relative to recvbuf at which to place the incoming data from process i (significant only at root).",
        "recvtype: Datatype of recv buffer elements (significant only at root) (handle).",
        "root: Rank of receiving process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice, significant only at root).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Improbe",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, MPI_Message *message, MPI_Status *status)",
      "description": "Like MPI_Probe and MPI_Iprobe, the MPI_Mprobe and MPI_Improbe operationsallow incoming messages to be queried without actually receivingthem, except that MPI_Mprobe and MPI_Improbe provide a mechanism toreceive the specific message that was matched regardless of otherintervening probe or receive operations. This gives the applicationan opportunity to decide how to receive the message, based on theinformation returned by the probe. In particular, the application mayallocate memory for the receive buffer according to the length of theprobed message.",
      "input_parameters": [
        "source: Source rank or MPI_ANY_SOURCE (integer).",
        "tag: Tag value or MPI_ANY_TAG (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: Flag (logical).",
        "message: Message (handle).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Imrecv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Imrecv(void *buf, int count, MPI_Datatype type, MPI_Message *message, MPI_Request *request)",
      "description": "The functions MPI_Mrecv and MPI_Imrecv receive messages that have beenpreviously matched by a matching probe.",
      "input_parameters": [
        "count: Number of elements to receive (nonnegative integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "message: Message (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of receive buffer (choice).",
        "request: Request (handle)."
      ]
    },
    {
      "name": "MPI_Ineighbor_allgather",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request req)",
      "description": "MPI_Neighbor_allgather is similar to MPI_Allgather, except that only the neighboring processes receive the result, instead of all processes. The neighbors and buffer layout is determined by the topology of comm.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvbuf: Starting address of recv buffer (choice).",
        "recvcount: Number of elements received from any process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ineighbor_allgatherv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Neighbor_allgatherv is similar to MPI_Neighbor_allgather in that all processes gather data from all neighbors, except that each process can send a different amount of data. The block of data sent from the jth neighbor is received by every neighbor and placed in the jth block of the buffer. The neighbors and buffer layout is determined by the topology of comm. recvbuf.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Integer array (of length group size) containing the number of elements that are received from each neighbor.",
        "displs: Integer array (of length group size). Entry i specifies the displacement (relative to recvbuf) at which to place the incoming data from neighbor i.",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ineighbor_alltoall",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Neighbor_alltoall is a collective operation in which all processes send and receive the same amount of data to each neighbor. The operation of this routine can be represented as follows, where each process performs 2n (n being the number of neighbors in communicator comm) independent point-to-point communications. The neighbors and buffer layout are determined by the topology of comm.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements to send to each process (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements to receive from each process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator over which data is to be exchanged (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ineighbor_alltoallv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Neighbor_alltoallv is a generalized collective operation in which allprocesses send data to and receive data from all neighbors. Itadds flexibility to MPI_Neighbor_alltoall by allowing the user to specify datato send and receive vector-style (via a displacement and elementcount). The operation of this routine can be thought of as follows,where each process performs 2n (n being the number of neighbors into topology of communicator comm) independent point-to-point communications.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send to neighbor i.",
        "sdispls: Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype) from which to send data to neighbor i.",
        "sendtype: Datatype of send buffer elements.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from neighbor j.",
        "rdispls: Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to which data from neighbor j should be written.",
        "recvtype: Datatype of receive buffer elements.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ineighbor_alltoallw",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Neighbor_alltoallw is a generalized collective operation in which allprocesses send data to and receive data from all neighbors. Itadds flexibility to MPI_Neighbor_alltoallv by allowing the user to specify thedatatype of individual data blocks (in addition to displacement andelement count). Its operation can be thought of in the following way, where each process performs 2n (n being the number of neighbors inthe topology of communicator comm) independent point-to-point communications.The neighbors and buffer layout are determined by the topology of comm.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send to neighbor i.",
        "sdispls: Integer array, where entry i specifies the displacement (in bytes, offset from sendbuf) from which to send data to neighbor i.",
        "sendtypes: Datatype array, where entry i specifies the datatype to use when sending data to neighbor i.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from neighbor j.",
        "rdispls: Integer array, where entry j specifies the displacement (in bytes, offset from recvbuf) to which data from neighbor j should be written.",
        "recvtypes: Datatype array, where entry j specifies the datatype to use when receiving data from neighbor j.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Info_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Info_c2f(MPI_Info info)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Info_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_create(MPI_Info *info)",
      "description": "MPI_Info_create creates a new info object. The newly created object contains no key/value pairs.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "info: Info object created (handle)."
      ]
    },
    {
      "name": "MPI_Info_delete",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_delete(MPI_Info info, const char *key)",
      "description": "MPI_Info_delete deletes a (key,value) pair from info. If key is not defined in info, the call raises an error of class MPI_ERR_INFO_NOKEY.",
      "input_parameters": [
        "key: Key (string)."
      ],
      "input_output_parameters": [
        "info: Info object (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Info_dup",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo)",
      "description": "MPI_Info_dup duplicates an existing info object, creating a new object, with the same (key,value) pairs and the same ordering of keys.",
      "input_parameters": [
        "info: Info object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newinfo: Info object (handle)."
      ]
    },
    {
      "name": "MPI_Info_env",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_INFO_ENV - Static MPI_Info object containing info about the application",
      "description": "The MPI-3 standard established a static MPI_Info object named MPI_INFO_ENV that can be used to access information about how the application was executed from the run-time.",
      "input_parameters": [
        "command: If available, the value will be set to argv[0]. Note that the value may not always be available - e.g., it is valid for a program to call MPI_Init with NULL parameters, in which case argv[0] will not be set if run as a singleton. This value will never be set in a Fortran program as the argv are not available.",
        "argv: The argv given for the application. If no arguments are passed to the application, then this value will not be set. It will also not be set in the case of a singleton that calls MPI_Init with NULL parameters, or a Fortran program.",
        "maxprocs: The number of processes in the job.",
        "soft: Open MPI does not support the soft option for specifying the number of processes to be executed, so this value is set to the same as maxprocs.",
        "host: The name of the host this process is executing upon - the value returned from gethostname().",
        "arch: The architecture of the host this process is executing upon. This value indicates the underlying chip architecture (e.g., x86_64), if it can be determined.",
        "wdir: The working directory at the time of process launch by mpiexec. Note that this value will not be set for processes launched as singletons as there is no reliable way for the MPI library to determine the location.",
        "file: Although specified by the MPI-3 standard, no value is currently set for this field.",
        "thread_level: The requested MPI thread level - note that this may differ from the actual MPI thread level of the application.",
        "ompi_num_apps: The number of application contexts in an MPMD job. This is an Open MPI-specific field and value.",
        "ompi_np: The number of processes in each application context, provided as a space-delimited list of integers. This is an Open MPI-specific field and value.",
        "ompi_first_rank: The MPI rank of the first process in each application context, provided as a space-delimited list of integers. This is an Open MPI-specific field and value.",
        "ompi_positioned_file_dir: If Open MPI was asked to pre-position files, this field provides the top-level directory where those files were place. This is an Open MPI-specific field and value."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Info_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Info MPI_Info_f2c(MPI_Fint info)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Info_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_free(MPI_Info *info)",
      "description": "MPI_Info_free frees info and sets it to MPI_INFO_NULL.",
      "input_parameters": [],
      "input_output_parameters": [
        "info: Info object (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Info_get",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_get(MPI_Info info, const char *key, int valuelen, char *value, int *flag)",
      "description": "MPI_Info_get retrieves the value associated with key in a previous call to MPI_Info_set. If such a key exists, it sets flag to true and returns the value in value; otherwise it sets flag to false and leaves value unchanged. valuelen is the number of characters available in value. If it is less than the actual size of the value, the returned value is truncated. In C, valuelen should be one less than the amount of allocated space to allow for the null terminator.",
      "input_parameters": [
        "info: Info object (handle).",
        "key: Key (string).",
        "valuelen: Length of value arg (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "value: Value (string).",
        "flag: Returns true if key defined, false if not (boolean)."
      ]
    },
    {
      "name": "MPI_Info_get_nkeys",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_get_nkeys(MPI_Info info, int *nkeys)",
      "description": "MPI_Info_get_nkeys returns the number of currently defined keys in info.",
      "input_parameters": [
        "info: Info object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "nkeys: Number of defined keys (integer)."
      ]
    },
    {
      "name": "MPI_Info_get_nthkey",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_get_nthkey(MPI_Info info, int n, char *key)",
      "description": "MPI_Info_get_nthkey returns the nth defined key in info. Keys are numbered 0...N - 1 where N is the value returned by MPI_Info_get_nkeys. All keys between 0 and N - 1 are guaranteed to be defined. The number of a given key does not change as long as info is not modified with MPI_Info_set or MPI_Info_delete.",
      "input_parameters": [
        "info: Info object (handle).",
        "n: Key number (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "key: Key (string)."
      ]
    },
    {
      "name": "MPI_Info_get_valuelen",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, int *flag)",
      "description": "MPI_Info_get_valuelen retrieves the length of the value associated with key. If key is defined, valuelen is set to the length of its associated value and flag is set to true. If key is not defined, valuelen is not touched and flag is set to false. The length returned in C or C++ does not include the end-of-string character.",
      "input_parameters": [
        "info: Info object (handle).",
        "key: Key (string)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "valuelen: Length of value arg (integer).",
        "flag: Returns true if key defined, false if not (boolean)."
      ]
    },
    {
      "name": "MPI_Info_set",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Info_set(MPI_Info info, char *key, char *value)",
      "description": "MPI_Info_set adds the (key,value) pair to info and overrides the value if a value for the same key was previously set. The key and value parameters are null-terminated strings in C. In Fortran, leading and trailing spaces in key and value are stripped. If either key or value is larger than the allowed maximums, the error MPI_ERR_INFO_KEY or MPI_ERR_INFO_VALUE is raised, respectively.",
      "input_parameters": [
        "key: Key (string).",
        "value: Value (string)."
      ],
      "input_output_parameters": [
        "info: Info object (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Init",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Init(int *argc, char ***argv)",
      "description": "This routine, or MPI_Init_thread, must be called before most other MPIroutines are called. There are a small number of exceptions, such asMPI_Initialized and MPI_Finalized. MPI can be initialized at mostonce; subsequent calls to MPI_Init or MPI_Init_thread are erroneous.",
      "input_parameters": [
        "argc: C/C++ only: Pointer to the number of arguments.",
        "argv: C/C++ only: Argument vector."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Initialized",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Initialized(int *flag)",
      "description": "This routine may be used to determine whether MPI has beeninitialized. It is one of a small number of routines that may becalled before MPI is initialized and after MPI has been finalized(MPI_Finalized is another).",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: True if MPI has been initialized, and false otherwise (logical)."
      ]
    },
    {
      "name": "MPI_Init_thread",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Init_thread(int *argc, char ***argv, int required, int *provided)",
      "description": "This routine, or MPI_Init, must be called before most other MPIroutines are called. There are a small number of exceptions, such asMPI_Initialized and MPI_Finalized. MPI can be initialized at mostonce; subsequent calls to MPI_Init or MPI_Init_thread are erroneous.",
      "input_parameters": [
        "argc: C/C++ only: Pointer to the number of arguments.",
        "argv: C/C++ only: Argument vector.",
        "required: Desired level of thread support (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "provided: Available level of thread support (integer)."
      ]
    },
    {
      "name": "MPI_Intercomm_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm)",
      "description": "This call creates an intercommunicator. It is collective over the union of the local and remote groups. Processes should provide identical local_comm and local_leader arguments within each group. Wildcards are not permitted for remote_leader, local_leader, and tag.",
      "input_parameters": [
        "local_comm: The communicator containing the process that initiates the inter-communication (handle).",
        "local_leader: Rank of local group leader in local_comm (integer).",
        "peer_comm: \"Peer\" communicator; significant only at the local_leader (handle).",
        "remote_leader: Rank of remote group leader in peer_comm; significant only at the local_leader (integer).",
        "tag: Message tag used to identify new intercommunicator (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newintercomm: Created intercommunicator (handle)."
      ]
    },
    {
      "name": "MPI_Intercomm_merge",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)",
      "description": "This function creates an intracommunicator from the union of the two groups that are associated with intercomm. All processes should provide the same high value within each of the two groups. If processes in one group provide the value high = false and processes in the other group provide the value high = true, then the union orders the \"low\" group before the \"high\" group. If all processes provide the same high argument, then the order of the union is arbitrary. This call is blocking and collective within the union of the two groups.",
      "input_parameters": [
        "intercomm: Intercommunicator (type indicator).",
        "high: Used to order the groups of the two intracommunicators within comm when creating the new communicator (type indicator)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newintracomm: Created intracommunicator (type indicator)."
      ]
    },
    {
      "name": "MPI_Iprobe",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status)",
      "description": "The MPI_Probe and MPI_Iprobe operations allow checking of incoming messages without actual receipt of them. The user can then decide how to receive them, based on the information returned by the probe (basically, the information returned by status). In particular, the user may allocate memory for the receive buffer, according to the length of the probed message.",
      "input_parameters": [
        "source: Source rank or MPI_ANY_SOURCE (integer).",
        "tag: Tag value or MPI_ANY_TAG (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: Message-waiting flag (logical).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Irecv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion.",
      "input_parameters": [
        "buf: Initial address of receive buffer (choice).",
        "count: Number of elements in receive buffer (integer).",
        "datatype: Datatype of each receive buffer element (handle).",
        "source: Rank of source (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Ireduce",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Request *request)",
      "description": "The global reduce functions (MPI_Reduce, MPI_Op_create, MPI_Op_free, MPI_Allreduce, MPI_Reduce_scatter, MPI_Scan) perform a global reduce operation (such as sum, max, logical AND, etc.) across all the members of a group. The reduction operation can be either one of a predefined list of operations, or a user-defined operation. The global reduction functions come in several flavors: a reduce that returns the result of the reduction at one node, an all-reduce that returns this result at all nodes, and a scan (parallel prefix) operation. In addition, a reduce-scatter operation combines the functionality of a reduce and a scatter operation.",
      "input_parameters": [
        "sendbuf: Address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Data type of elements of send buffer (handle).",
        "op: Reduce operation (handle).",
        "root: Rank of root process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice, significant only at root).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ireduce_scatter",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Reduce_scatter first does an element-wise reduction on vector of count = S(i)recvcounts[i] elements in the send buffer defined by sendbuf, count, anddatatype. Next, the resulting vector of results is split into n disjointsegments, where n is the number of processes in the group. Segment i containsrecvcounts[i] elements. The ith segment is sent to process i and stored inthe receive buffer defined by recvbuf, recvcounts[i], and datatype. USE OF IN-PLACE OPTIONWhen the communicator is an intracommunicator, you can perform a reduce-scatter operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the sendbuf. In this case, the input data is taken from the top of the receive buffer. The area occupied by the input data may be either longer or shorter than the data filled by the output data.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "recvcounts: Integer array specifying the number of elements in result distributed to: each process. Array must be identical on all calling processes.",
        "datatype: Datatype of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Ireduce_scatter_block",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Ireduce_scatter_block first does an element-wise reduction on vector of count = n * recvcount elements in the send buffer defined by sendbuf, count, anddatatype, using the operation op, where n is the number ofprocesses in the group of comm. Next, the resulting vector of results is split into n disjointsegments, where n is the number of processes in the group. Each segments contains recvcountelements. The ith segment is sent to process i and stored in the receive buffer defined byrecvbuf, recvcount, and datatype.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "recvcount: lement count per block (non-negative integer).",
        "datatype: Datatype of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Irsend",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Irsend starts a ready-mode nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or to wait for its completion.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Iscan",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Scan is used to perform an inclusive prefix reduction on datadistributed across the calling processes. The operation returns, inthe recvbuf of the process with rank i, the reduction(calculated according to the function op) of the values in thesendbufs of processes with ranks 0, ..., i (inclusive). The typeof operations supported, their semantics, and the constraints on sendand receive buffers are as for MPI_Reduce. EXAMPLEThis example uses a user-defined operation to produce a segmentedscan. A segmented scan takes, as input, a set of values and a set oflogicals, where the logicals delineate the various segments of thescan. For example,",
      "input_parameters": [
        "sendbuf: Send buffer (choice).",
        "count: Number of elements in input buffer (integer).",
        "datatype: Data type of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Iscatter",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Scatter is the inverse operation to MPI_Gather.",
      "input_parameters": [
        "sendbuf: Address of send buffer (choice, significant only at root).",
        "sendcount: Number of elements sent to each process (integer, significant only at: root).",
        "sendtype: Datatype of send buffer elements (handle, significant only at root).",
        "recvcount: Number of elements in receive buffer (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "root: Rank of sending process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Iscatterv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Scatterv is the inverse operation to MPI_Gatherv.",
      "input_parameters": [
        "sendbuf: Address of send buffer (choice, significant only at root).",
        "sendcounts: Integer array (of length group size) specifying the number of elements to send to each processor.",
        "displs: Integer array (of length group size). Entry i specifies the displacement (relative to sendbuf) from which to take the outgoing data to process i.",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements in receive buffer (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "root: Rank of sending process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Isend",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "MPI_Isend starts a standard-mode, nonblocking send. Nonblocking calls allocate a communication request object and associate it with the request handle (the argument request). The request can be used later to query the status of the communication or wait for its completion.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Issend",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "Starts a synchronous mode, nonblocking send.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Is_thread_main",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Is_thread_main(int *flag)",
      "description": "MPI_Is_thread_main is called by a thread to find out whether thecaller is the main thread (that is, the thread that called MPI_Init orMPI_Init_thread).",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: True if calling thread is main thread (boolean)."
      ]
    },
    {
      "name": "MPI_Keyval_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Keyval_create(MPI_Copy_function *copy_fn, MPI_Delete_function *delete_fn, int *keyval, void *extra_state)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Comm_create_keyval instead.",
      "input_parameters": [
        "copy_fn: Copy callback function for keyval.",
        "delete_fn: Delete callback function for keyval.",
        "extra_state: Extra state for callback functions."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "keyval: Key value for future access (integer)."
      ]
    },
    {
      "name": "MPI_Keyval_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Keyval_free(int *keyval)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Comm_free_keyval instead.",
      "input_parameters": [
        "keyval: Frees the integer key value (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Lookup_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)",
      "description": "This function retrieves a port_name published underservice_name by a previous invocation of MPI_Publish_name. Theapplication must supply a port_name buffer large enough to holdthe largest possible port name (i.e., MPI_MAX_PORT_NAME bytes). INFO ARGUMENTSThe following keys for info are recognized:",
      "input_parameters": [
        "service_name: A service name (string).",
        "info: Options to the name service functions (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "port_name: a port name (string)."
      ]
    },
    {
      "name": "MPI_Message_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Message_c2f(MPI_Message message)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Message_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Message MPI_Message_f2c(MPI_Fint message)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Mprobe",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Mprobe(int source, int tag, MPI_Comm comm, MPI_Message *message, MPI_Status *status)",
      "description": "Like MPI_Probe and MPI_Iprobe, the MPI_Mprobe and MPI_Improbe operationsallow incoming messages to be queried without actually receivingthem, except that MPI_Mprobe and MPI_Improbe provide a mechanism toreceive the specific message that was matched regardless of otherintervening probe or receive operations. This gives the applicationan opportunity to decide how to receive the message, based on theinformation returned by the probe. In particular, the application mayallocate memory for the receive buffer according to the length of theprobed message.",
      "input_parameters": [
        "source: Source rank or MPI_ANY_SOURCE (integer).",
        "tag: Tag value or MPI_ANY_TAG (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "message: Message (handle).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Mrecv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Mrecv(void *buf, int count, MPI_Datatype type, MPI_Message *message, MPI_Status *status)",
      "description": "The functions MPI_Mrecv and MPI_Imrecv receive messages that have beenpreviously matched by a matching probe.",
      "input_parameters": [
        "count: Number of elements to receive (nonnegative integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "message: Message (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of receive buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Neighbor_allgather",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Neighbor_allgather is similar to MPI_Allgather, except that only the neighboring processes receive the result, instead of all processes. The neighbors and buffer layout is determined by the topology of comm.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvbuf: Starting address of recv buffer (choice).",
        "recvcount: Number of elements received from any process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Neighbor_allgatherv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Neighbor_allgatherv is similar to MPI_Neighbor_allgather in that all processes gather data from all neighbors, except that each process can send a different amount of data. The block of data sent from the jth neighbor is received by every neighbor and placed in the jth block of the buffer. The neighbors and buffer layout is determined by the topology of comm. recvbuf.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements in send buffer (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Integer array (of length group size) containing the number of elements that are received from each neighbor.",
        "displs: Integer array (of length group size). Entry i specifies the displacement (relative to recvbuf) at which to place the incoming data from neighbor i.",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Neighbor_alltoall",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Neighbor_alltoall is a collective operation in which all processes send and receive the same amount of data to each neighbor. The operation of this routine can be represented as follows, where each process performs 2n (n being the number of neighbors in communicator comm) independent point-to-point communications. The neighbors and buffer layout are determined by the topology of comm.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "sendcount: Number of elements to send to each process (integer).",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements to receive from each process (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "comm: Communicator over which data is to be exchanged (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Neighbor_alltoallv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm)",
      "description": "MPI_Neighbor_alltoallv is a generalized collective operation in which allprocesses send data to and receive data from all neighbors. Itadds flexibility to MPI_Neighbor_alltoall by allowing the user to specify datato send and receive vector-style (via a displacement and elementcount). The operation of this routine can be thought of as follows, where each process performs 2n (n being the number of neighbors into topology of communicator comm) independent point-to-point communications.The neighbors and buffer layout are determined by the topology of comm.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send to neighbor i.",
        "sdispls: Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype) from which to send data to neighbor i.",
        "sendtype: Datatype of send buffer elements.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from neighbor j.",
        "rdispls: Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to which data from neighbor j should be written.",
        "recvtype: Datatype of receive buffer elements.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Neighbor_alltoallw",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm)",
      "description": "MPI_Neighbor_alltoallw is a generalized collective operation in which allprocesses send data to and receive data from all neighbors. Itadds flexibility to MPI_Neighbor_alltoallv by allowing the user to specify thedatatype of individual data blocks (in addition to displacement andelement count). Its operation can be thought of in the following way, where each process performs 2n (n being the number of neighbors inthe topology of communicator comm) independent point-to-point communications.The neighbors and buffer layout are determined by the topology of comm.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer.",
        "sendcounts: Integer array, where entry i specifies the number of elements to send to neighbor i.",
        "sdispls: Integer array, where entry i specifies the displacement (in bytes, offset from sendbuf) from which to send data to neighbor i.",
        "sendtypes: Datatype array, where entry i specifies the datatype to use when sending data to neighbor i.",
        "recvcounts: Integer array, where entry j specifies the number of elements to receive from neighbor j.",
        "rdispls: Integer array, where entry j specifies the displacement (in bytes, offset from recvbuf) to which data from neighbor j should be written.",
        "recvtypes: Datatype array, where entry j specifies the datatype to use when receiving data from neighbor j.",
        "comm: Communicator over which data is to be exchanged."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer.",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Open_port",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Open_port(MPI_Info info, char *port_name)",
      "description": "MPI_Open_port establishes a network address, encoded in the port_name string, at which the server will be able to accept connections from clients. port_name is supplied by the system.",
      "input_parameters": [
        "info: Options on how to establish an address (handle). No options currently supported."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "port_name: Newly established port (string)."
      ]
    },
    {
      "name": "MPI_Op_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Op_c2f(MPI_Op op)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Op_commutative",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Op_commutative(MPI_Op op, int *commute)",
      "description": "Reduction operations can be queried for their commutativity.",
      "input_parameters": [
        "op: Operation (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "commute: True if op is commutative, false otherwise (logical)."
      ]
    },
    {
      "name": "MPI_Op_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op)",
      "description": "MPI_Op_create binds a user-defined global operation to an op handle that can subsequently be used in MPI_Reduce, MPI_Allreduce, MPI_Reduce_scatter, and MPI_Scan.",
      "input_parameters": [
        "function: User-defined function (function).",
        "commute: True if commutative; false otherwise."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "op: Operation (handle)."
      ]
    },
    {
      "name": "MPI_Op_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Op MPI_Op_f2c(MPI_Fint op)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Op_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Op_free(MPI_Op *op)",
      "description": "Marks a user-defined reduction operation for deallocation and sets op to MPI_OP_NULL.",
      "input_parameters": [],
      "input_output_parameters": [
        "op: Operation (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Pack",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, int outsize, int *position, MPI_Comm comm)",
      "description": "Packs the message in the send buffer specified by inbuf, incount, datatype into the buffer space specified by outbuf and outsize.",
      "input_parameters": [
        "inbuf: Input buffer start (choice).",
        "incount: Number of input data items (integer).",
        "datatype: Datatype of each input data item (handle).",
        "outsize: Output buffer size, in bytes (integer).",
        "comm: Communicator for packed message (handle)."
      ],
      "input_output_parameters": [
        "position: Current position in buffer, in bytes (integer)."
      ],
      "output_parameters": [
        "outbuf: Output buffer start (choice)."
      ]
    },
    {
      "name": "MPI_Pack_external",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Pack_external(const char *datarep, const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, MPI_Aint outsize, MPI_Aint *position)",
      "description": "MPI_Pack_external packs data into the external32 format, a universaldata representation defined by the MPI Forum.",
      "input_parameters": [
        "datarep: Data representation (string).",
        "inbuf: Input buffer start (choice).",
        "incount: Number of input data items (integer).",
        "datatype: Datatype of each input data item (handle).",
        "outsize: Output buffer size, in bytes (integer)."
      ],
      "input_output_parameters": [
        "position: Current position in buffer, in bytes (integer)."
      ],
      "output_parameters": [
        "outbuf: Output buffer start (choice)."
      ]
    },
    {
      "name": "MPI_Pack_external_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Pack_external_size(char *datarep, int incount, MPI_Datatype datatype, MPI_Aint *size)",
      "description": "MPI_Pack_external_size allows the application to find out how muchspace is needed to pack a message in the portable format defined bythe MPI Forum.",
      "input_parameters": [
        "datarep: Data representation (string).",
        "incount: Number of input data items (integer).",
        "datatype: Datatype of each input data item (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Upper bound on size of packed message, in bytes (integer)."
      ]
    },
    {
      "name": "MPI_Pack_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm, int *size)",
      "description": "MPI_Pack_size allows the application to find out how much space is needed to pack a message.",
      "input_parameters": [
        "incount: Count argument to packing call (integer).",
        "datatype: Datatype argument to packing call (handle).",
        "comm: Communicator argument to packing call (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Upper bound on size of packed message, in bytes (integer)."
      ]
    },
    {
      "name": "MPI_Pcontrol",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Pcontrol(const int level, &...)",
      "description": "MPI libraries themselves make no use of this routine; they simply return immediately to the user code. However the presence of calls to this routine allows a profiling package to be explicitly called by the user.",
      "input_parameters": [
        "level: Profiling level."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Probe",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)",
      "description": "The MPI_Probe and MPI_Iprobe operations allow checking of incoming messages, without actual receipt of them. The user can then decide how to receive them, based on the information returned by the probe in the status variable. For example, the user may allocate memory for the receive buffer, according to the length of the probed message.",
      "input_parameters": [
        "source: Source rank or MPI_ANY_SOURCE (integer).",
        "tag: Tag value or MPI_ANY_TAG (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Publish_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_name)",
      "description": "This routine publishes the pair (service_name, port_name) so thatan application may retrieve port_name by calling MPI_Lookup_namewith service_name as an argument. It is an error to publish the sameservice_name twice, or to use a port_name argument that wasnot previously opened by the calling process via a call to MPI_Open_port. INFO ARGUMENTSThe following keys for info are recognized:",
      "input_parameters": [
        "service_name: A service name (string).",
        "info: Options to the name service functions (handle).",
        "port_name: A port name (string)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Put",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win)",
      "description": "MPI_Put transfers origin_count successive entries of the type specified by origin_datatype, starting at address origin_addr on the origin node to the target node specified by the win, target_rank pair. The data are written in the target buffer at address target_addr = window_base + target_disp x disp_unit, where window_base and disp_unit are the base address and window displacement unit specified at window initialization, by the target process.",
      "input_parameters": [
        "origin_addr: Initial address of origin buffer (choice).",
        "origin_count: Number of entries in origin buffer (nonnegative integer).",
        "origin_datatype: Data type of each entry in origin buffer (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: Data type of each entry in target buffer (handle).",
        "win: Window object used for communication (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Query_thread",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Query_thread(int *provided)",
      "description": "This routine returns in provided the current level of threadsupport. If MPI was initialized by a call to MPI_Init_thread,provided will have the same value as was returned by thatfunction.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "provided: C/Fortran only: Level of thread support (integer)."
      ]
    },
    {
      "name": "MPI_Raccumulate",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request)",
      "description": "MPI_Raccumulate is similar to MPI_Accumulate, except that it allocates a communication request object and associates it with the request handle (the argument request) that can be used to wait or test for completion. The completion of an MPI_Raccumulate operation indicates that the origin_addr buffer is free to be updated. It does not indicate that the operation has completed at the target window.",
      "input_parameters": [
        "origin_addr: Initial address of buffer (choice).",
        "origin_count: Number of entries in buffer (nonnegative integer).",
        "origin_datatype: Data type of each buffer entry (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to beginning of target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: Data type of each entry in target buffer (handle).",
        "op: Reduce operation (handle).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Recv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status)",
      "description": "This basic receive operation, MPI_Recv, is blocking: it returns only after the receive buffer contains the newly received message. A receive can complete before the matching send has completed (of course, it can complete only after the matching send has started).",
      "input_parameters": [
        "count: Maximum number of elements to receive (integer).",
        "datatype: Datatype of each receive buffer entry (handle).",
        "source: Rank of source (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "buf: Initial address of receive buffer (choice).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Recv_init",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "Creates a persistent communication request for a receive operation. The argument buf is marked as OUT because the user gives permission to write on the receive buffer by passing the argument to MPI_Recv_init.",
      "input_parameters": [
        "count: Maximum number of elements to receive (integer).",
        "datatype: Type of each entry (handle).",
        "source: Rank of source (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [
        "buf: Initial address of receive buffer (choice)."
      ],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Reduce",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)",
      "description": "The global reduce functions (MPI_Reduce, MPI_Op_create, MPI_Op_free, MPI_Allreduce, MPI_Reduce_scatter, MPI_Scan) perform a global reduce operation (such as sum, max, logical AND, etc.) across all the members of a group. The reduction operation can be either one of a predefined list of operations, or a user-defined operation. The global reduction functions come in several flavors: a reduce that returns the result of the reduction at one node, an all-reduce that returns this result at all nodes, and a scan (parallel prefix) operation. In addition, a reduce-scatter operation combines the functionality of a reduce and a scatter operation.",
      "input_parameters": [
        "sendbuf: Address of send buffer (choice).",
        "count: Number of elements in send buffer (integer).",
        "datatype: Data type of elements of send buffer (handle).",
        "op: Reduce operation (handle).",
        "root: Rank of root process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice, significant only at root).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Reduce_local",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op)",
      "description": "The global reduce functions (MPI_Reduce_local, MPI_Op_create, MPI_Op_free, MPI_Allreduce, MPI_Reduce_local_scatter, MPI_Scan) perform a global reduce operation (such as sum, max, logical AND, etc.) across all the members of a group. The reduction operation can be either one of a predefined list of operations, or a user-defined operation. The global reduction functions come in several flavors: a reduce that returns the result of the reduction at one node, an all-reduce that returns this result at all nodes, and a scan (parallel prefix) operation. In addition, a reduce-scatter operation combines the functionality of a reduce and a scatter operation.",
      "input_parameters": [
        "inbuf: Address of input buffer (choice).",
        "count: Number of elements in input buffer (integer).",
        "datatype: Data type of elements of input buffer (handle).",
        "op: Reduce operation (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "inoutbuf: Address of in/out buffer (choice)."
      ]
    },
    {
      "name": "MPI_Reduce_scatter",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)",
      "description": "MPI_Reduce_scatter first does an element-wise reduction on vector of count = S(i)recvcounts[i] elements in the send buffer defined by sendbuf, count, anddatatype. Next, the resulting vector of results is split into n disjointsegments, where n is the number of processes in the group. Segment i containsrecvcounts[i] elements. The ith segment is sent to process i and stored inthe receive buffer defined by recvbuf, recvcounts[i], and datatype. USE OF IN-PLACE OPTIONWhen the communicator is an intracommunicator, you can perform a reduce-scatter operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the sendbuf. In this case, the input data is taken from the top of the receive buffer. The area occupied by the input data may be either longer or shorter than the data filled by the output data.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "recvcounts: Integer array specifying the number of elements in result distributed to each process. Array must be identical on all calling processes.",
        "datatype: Datatype of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Reduce_scatter_block",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)",
      "description": "MPI_Reduce_scatter_block first does an element-wise reduction on vector of count = n * recvcount elements in the send buffer defined by sendbuf, count, anddatatype, using the operation op, where n is the number ofprocesses in the group of comm. Next, the resulting vector of results is split into n disjointsegments, where n is the number of processes in the group. Each segments contains recvcountelements. The ith segment is sent to process i and stored in the receive buffer defined byrecvbuf, recvcount, and datatype. USE OF IN-PLACE OPTIONWhen the communicator is an intracommunicator, you can perform a reduce-scatter operation in-place (the output buffer is used as the input buffer). Use the variable MPI_IN_PLACE as the value of the sendbuf. In this case, the input data is taken from the top of the receive buffer. The area occupied by the input data may be either longer or shorter than the data filled by the output data.",
      "input_parameters": [
        "sendbuf: Starting address of send buffer (choice).",
        "recvcount: lement count per block (non-negative integer).",
        "datatype: Datatype of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Starting address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Register_datarep",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Register_datarep(const char *datarep, MPI_Datarep_conversion_function *read_conversion_fn, MPI_Datarep_conversion_function *write_conversion_fn, MPI_Datarep_extent_function *dtype_file_extent_fn, void *extra_state)",
      "description": "MPI_Register_datarep defines a data representation. It associates the data representation's identifier (a string) with the functions that convert from file representation to the native representation and vice versa, with the function that gets the extent of a data type as represented in the file, as well as with \"extra state,\" which is used for passing arguments. Once a data representation has been registered using this routine, you may specify its identifier as an argument to MPI_File_set_view, causing subsequent data-access operations to call the specified conversion functions.",
      "input_parameters": [
        "datarep: Data representation identifier (string).",
        "read_conversion_fn: Function invoked to convert from file representation to native representation (function).",
        "write_conversion_fn: Function invoked to convert from native representation to file representation (function).",
        "dtype_file_extent_fn: Function invoked to get the extent of a data type as represented in the file (function).",
        "extra_state: Extra state."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Request_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Request_c2f(MPI_Request request)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Request_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Request MPI_Request_f2c(MPI_Fint request)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Request_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Request_free(MPI_Request *request)",
      "description": "This operation allows a request object to be deallocated without waiting for the associated communication to complete.",
      "input_parameters": [],
      "input_output_parameters": [
        "request: Communication request (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Request_get_status",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Request_get_status(MPI_Request request, int *flag, MPI_Status *status)",
      "description": "MPI_Request_get_status sets flag=true if the operation is complete or sets flag=false if it is not complete. If the operation is complete, it returns in status the request status. It does not deallocate or inactivate the request; a subsequent call to test, wait, or free should be executed with that request.",
      "input_parameters": [
        "request: Communication request (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: Boolean flag, same as from MPI_Test (logical).",
        "status: MPI_Status object if flag is true (status)."
      ]
    },
    {
      "name": "MPI_Rget",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request)",
      "description": "MPI_Rget is similar to MPI_Get, except that it allocates a communication request object and associates it with the request handle (the argument request) that can be used to wait or test for completion. The completion of an MPI_Rget operation indicates that the data is available in the origin buffer. If origin_addr points to memory attached to a window, then the data becomes available in the private copy of this window.",
      "input_parameters": [
        "origin_addr: Initial address of origin buffer (choice).",
        "origin_count: Number of entries in origin buffer (nonnegative integer).",
        "origin_datatype: Data type of each entry in origin buffer (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from window start to the beginning of the target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: datatype of each entry in target buffer (handle)",
        "win: window object used for communication (handle)"
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Rget_accumulate",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request)",
      "description": "MPI_Rget_accumulate is similar to MPI_Get_accumulate, except that it allocates a communication request object and associates it with the request handle (the argument request) that can be used to wait or test for completion. The completion of an MPI_Rget_accumulate operation indicates that the data is available in the result buffer and the origin buffer is free to be updated. It does not indicate that the operation has been completed at the target window.",
      "input_parameters": [
        "origin_addr: Initial address of buffer (choice).",
        "origin_count: Number of entries in buffer (nonnegative integer).",
        "origin_datatype: Data type of each buffer entry (handle).",
        "result_addr: Initial address of result buffer (choice).",
        "result_count: Number of entries in result buffer (nonnegative integer).",
        "result_datatype: Data type of each result buffer entry (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to beginning of target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: Data type of each entry in target buffer (handle).",
        "op: Reduce operation (handle).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Rput",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request)",
      "description": "MPI_Rput is similar to MPI_Put, except that it allocates a communication request object and associates it with the request handle (the argument request). The completion of an MPI_Rput operation (i.e., after the corresponding test or wait) indicates that the sender is now free to update the locations in the origin_addr buffer. It does not indicate that the data is available at the target window. If remote completion is required, MPI_Win_flush, MPI_Win_flush_all, MPI_Win_unlock, or MPI_Win_unlock_all can be used.",
      "input_parameters": [
        "origin_addr: Initial address of origin buffer (choice).",
        "origin_count: Number of entries in origin buffer (nonnegative integer).",
        "origin_datatype: Data type of each entry in origin buffer (handle).",
        "target_rank: Rank of target (nonnegative integer).",
        "target_disp: Displacement from start of window to target buffer (nonnegative integer).",
        "target_count: Number of entries in target buffer (nonnegative integer).",
        "target_datatype: Data type of each entry in target buffer (handle).",
        "win: Window object used for communication (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Rsend",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Rsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)",
      "description": "A ready send may only be called if the user can guarantee that a receive isalready posted. It is an error if the receive is not posted before theready send is called.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements in send buffer (nonnegative integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Rsend_init",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "Creates a persistent communication object for a ready mode send operation, and binds to it all the arguments of a send operation.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements sent (integer).",
        "datatype: Type of each element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Scan",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)",
      "description": "MPI_Scan is used to perform an inclusive prefix reduction on datadistributed across the calling processes. The operation returns, inthe recvbuf of the process with rank i, the reduction(calculated according to the function op) of the values in thesendbufs of processes with ranks 0, ..., i (inclusive). The typeof operations supported, their semantics, and the constraints on sendand receive buffers are as for MPI_Reduce. EXAMPLEThis example uses a user-defined operation to produce a segmentedscan. A segmented scan takes, as input, a set of values and a set oflogicals, where the logicals delineate the various segments of thescan. For example,",
      "input_parameters": [
        "sendbuf: Send buffer (choice).",
        "count: Number of elements in input buffer (integer).",
        "datatype: Data type of elements of input buffer (handle).",
        "op: Operation (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Scatter",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)",
      "description": "MPI_Scatter is the inverse operation to MPI_Gather.",
      "input_parameters": [
        "sendbuf: Address of send buffer (choice, significant only at root).",
        "sendcount: Number of elements sent to each process (integer, significant only at root).",
        "sendtype: Datatype of send buffer elements (handle, significant only at root).",
        "recvcount: Number of elements in receive buffer (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "root: Rank of sending process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Scatterv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Scatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)",
      "description": "MPI_Scatterv is the inverse operation to MPI_Gatherv.",
      "input_parameters": [
        "sendbuf: Address of send buffer (choice, significant only at root).",
        "sendcounts: Integer array (of length group size) specifying the number of elements to send to each processor.",
        "displs: Integer array (of length group size). Entry i specifies the displacement (relative to sendbuf) from which to take the outgoing data to process i.",
        "sendtype: Datatype of send buffer elements (handle).",
        "recvcount: Number of elements in receive buffer (integer).",
        "recvtype: Datatype of receive buffer elements (handle).",
        "root: Rank of sending process (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Address of receive buffer (choice).",
        "request: Request (handle, non-blocking only)."
      ]
    },
    {
      "name": "MPI_Send",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)",
      "description": "MPI_Send performs a standard-mode, blocking send.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements send (nonnegative integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Sendrecv",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)",
      "description": "The send-receive operations combine in one call the sending of a message to one destination and the receiving of another message, from another process. The two (source and destination) are possibly the same. A send-receive operation is useful for executing a shift operation across a chain of processes. If blocking sends and receives are used for such a shift, then one needs to order the sends and receives correctly (for example, even processes send, then receive; odd processes receive first, then send) in order to prevent cyclic dependencies that may lead to deadlock. When a send-receive operation is used, the communication subsystem takes care of these issues. The send-receive operation can be used in conjunction with the functions described in Chapter 6 of the MPI-1 Standard, \"Process Topologies,\" in order to perform shifts on various logical topologies. Also, a send-receive operation is useful for implementing remote procedure calls.",
      "input_parameters": [
        "sendbuf: Initial address of send buffer (choice).",
        "sendcount: Number of elements to send (integer).",
        "sendtype: Type of elements in send buffer (handle).",
        "dest: Rank of destination (integer).",
        "sendtag: Send tag (integer).",
        "recvcount: Maximum number of elements to receive (integer).",
        "recvtype: Type of elements in receive buffer (handle).",
        "source: Rank of source (integer).",
        "recvtag: Receive tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "recvbuf: Initial address of receive buffer (choice).",
        "status: Status object (status). This refers to the receive operation."
      ]
    },
    {
      "name": "MPI_Sendrecv_replace",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status *status)",
      "description": "The send-receive operations combine in one call the sending of a message to one destination and the receiving of another message, from another process. The two (source and destination) are possibly the same. A send-receive operation is useful for executing a shift operation across a chain of processes. If blocking sends and receives are used for such a shift, then one needs to order the sends and receives correctly (for example, even processes send, then receive; odd processes receive first, then send) in order to prevent cyclic dependencies that may lead to deadlock. When a send-receive operation is used, the communication subsystem takes care of these issues. The send-receive operation can be used in conjunction with the functions described in Chapter 6 of the MPI Standard, \"Process Topologies,\" in order to perform shifts on various logical topologies. Also, a send-receive operation is useful for implementing remote procedure calls.",
      "input_parameters": [
        "count: Number of elements in send and receive buffer (integer).",
        "datatype: Type of elements to send and receive (handle).",
        "dest: Rank of destination (integer).",
        "sendtag: Send message tag (integer).",
        "source: Rank of source (integer).",
        "recvtag: Receive message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [
        "buf: Initial address of send and receive buffer (choice)."
      ],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Send_init",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "Creates a persistent communication request for a standard mode send operation, and binds to it all the arguments of a send operation.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements to send (integer).",
        "datatype: Type of each element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Sizeof",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Sizeof - Returns the size, in bytes, of the given type",
      "description": "MPI_SIZEOF returns the size (in bytes) of the machine representationof the given variable. It is a generic Fortran type and has a Fortranbinding only. This routine is similar to the sizeof builtin inC/C++. However, if given an array argument, it returns the size of thebase element, not the size of the whole array.",
      "input_parameters": [
        "X: A Fortran variable of numeric intrinsic type (choice)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "SIZE: Size of machine representation of that type (integer).",
        "IERROR: Error status (integer)."
      ]
    },
    {
      "name": "MPI_Ssend",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)",
      "description": "MPI_Ssend performs a synchronous-mode, blocking send. See the MPI-1 Standard for more detailed information about such sends.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements in send buffer (nonnegative integer).",
        "datatype: Datatype of each send buffer element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Ssend_init",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)",
      "description": "Creates a persistent communication object for a synchronous mode send operation, and binds to it all the arguments of a send operation.",
      "input_parameters": [
        "buf: Initial address of send buffer (choice).",
        "count: Number of elements to send (integer).",
        "datatype: Type of each element (handle).",
        "dest: Rank of destination (integer).",
        "tag: Message tag (integer).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "request: Communication request (handle)."
      ]
    },
    {
      "name": "MPI_Start",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Start(MPI_Request *request)",
      "description": "A communication (send or receive) that uses a persistent request is initiated by the function MPI_Start.",
      "input_parameters": [
        "request: Communication request (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Startall",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Startall(int count, MPI_Request array_of_requests[])",
      "description": "Starts all communications associated with requests in array_of_requests. A call to MPI_Startall(count, array_of_requests) has the same effect as calls to MPI_Start (&array_of_requests[i]), executed for i=0 ,..., count-1, in some arbitrary order.",
      "input_parameters": [
        "count: List length (integer)."
      ],
      "input_output_parameters": [
        "array_of_requests: Array of requests (array of handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Status_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Status_c2f(const MPI_Status *c_status, MPI_Fint *f_status)",
      "description": "These two procedures are provided in C to convert from a Fortran status (which is an array of integers) to a C status (which is a structure), and vice versa. The conversion occurs on all the information in status, including that which is hidden. That is, no status information is lost in the conversion.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Status_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Status_f2c(const MPI_Fint *f_status, MPI_Status *c_status)",
      "description": "These two procedures are provided in C to convert from a Fortran status (which is an array of integers) to a C status (which is a structure), and vice versa. The conversion occurs on all the information in status, including that which is hidden. That is, no status information is lost in the conversion.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Status_set_cancelled",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Status_set_cancelled(MPI_Status *status, int flag)",
      "description": "If flag is set to true, then a subsequent call to MPI_Test_cancelled(status, flag) will also return flag = true; otherwise it will return false.",
      "input_parameters": [
        "flag: If true, indicates request was canceled (logical)."
      ],
      "input_output_parameters": [
        "status: Status with which to associate cancel flag (status)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Status_set_elements",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype, int count)",
      "description": "MPI_Status_set_elements modifies the opaque part of status so that a call to MPI_Get_elements or MPI_Get_elements_x will return count. MPI_Get_count will return a compatible value.",
      "input_parameters": [
        "datatype: Data type associated with count (handle).",
        "count: Number of elements to associate with status (integer)."
      ],
      "input_output_parameters": [
        "status: Status to associate with count (status)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Status_set_elements_x",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Count count)",
      "description": "MPI_Status_set_elements modifies the opaque part of status so that a call to MPI_Get_elements or MPI_Get_elements_x will return count. MPI_Get_count will return a compatible value.",
      "input_parameters": [
        "datatype: Data type associated with count (handle).",
        "count: Number of elements to associate with status (integer)."
      ],
      "input_output_parameters": [
        "status: Status to associate with count (status)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Test",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status)",
      "description": "A call to MPI_Test returns flag = true if the operation identified by request is complete. In such a case, the status object is set to contain information on the completed operation; if the communication object was created by a nonblocking send or receive, then it is deallocated and the request handle is set to MPI_REQUEST_NULL. The call returns flag = false, otherwise. In this case, the value of the status object is undefined. MPI_Test is a local operation.",
      "input_parameters": [
        "request: Communication request (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: True if operation completed (logical).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Testall",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag, MPI_Status array_of_statuses[])",
      "description": "Returns flag = true if all communications associated with active handles in the array have completed (this includes the case where no handle in the list is active). In this case, each status entry that corresponds to an active handle request is set to the status of the corresponding communication; if the request was allocated by a nonblocking communication call then it is deallocated, and the handle is set to MPI_REQUEST_NULL. Each status entry that corresponds to a null or inactive handle is set to empty.",
      "input_parameters": [
        "count: Lists length (integer).",
        "array_of_requests: Array of requests (array of handles)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: True if previously initiated communications are complete (logical.)",
        "array_of_statuses: Array of status objects (array of status)."
      ]
    },
    {
      "name": "MPI_Testany",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Testany(int count, MPI_Request array_of_requests[], int *index, int *flag, MPI_Status *status)",
      "description": "MPI_Testany tests for completion of either one or none of the operations associated with active handles. In the former case, it returns flag = true, returns in index the index of this request in the array, and returns in status the status of that operation; if the request was allocated by a nonblocking communication call then the request is deallocated and the handle is set to MPI_REQUEST_NULL. (The array is indexed from 0 in C, and from 1 in Fortran.) In the latter case (no operation completed), it returns flag = false, returns a value of MPI_UNDEFINED in index, and status is undefined.",
      "input_parameters": [
        "count: List length (integer).",
        "array_of_requests: Array of requests (array of handles)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "index: Index of operation that completed, or MPI_UNDEFINED if none completed (integer).",
        "flag: True if one of the operations is complete (logical).",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Testsome",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[])",
      "description": "Behaves like MPI_Waitsome, except that it returns immediately.",
      "input_parameters": [
        "incount: Length of array_of_requests (integer).",
        "array_of_requests: Array of requests (array of handles)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "outcount: Number of completed requests (integer).",
        "array_of_indices: Array of indices of operations that completed (array of integers).",
        "array_of_statuses: Array of status objects for operations that completed (array of status)."
      ]
    },
    {
      "name": "MPI_Test_cancelled",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Test_cancelled(const MPI_Status *status, int *flag)",
      "description": "Returns flag = true if the communication associated with the status objectwas canceled successfully. In such a case, all other fields of status (such as count or tag) are undefined. Otherwise, returns flag = false. If a receive operation might be canceled, one should call MPI_Test_cancelled first, to check whether the operation was canceled, before checking on the other fields of the return status.",
      "input_parameters": [
        "status: Status object (status)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: True if operation was cancelled (logical)."
      ]
    },
    {
      "name": "MPI_Topo_test",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Topo_test(MPI_Comm comm, int *top_type)",
      "description": "The function MPI_Topo_test returns the type of topology that is assigned to a communicator.",
      "input_parameters": [
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "top_type: Topology type of communicator comm (choice)."
      ]
    },
    {
      "name": "MPI_Type_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Type_c2f(MPI_Datatype datatype)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_commit",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_commit(MPI_Datatype *datatype)",
      "description": "The commit operation commits the data type. A data type is the formal description of a communication buffer, not the content of that buffer. After a data type has been committed, it can be repeatedly reused to communicate the changing content of a buffer or, indeed, the content of different buffers, with different starting addresses.",
      "input_parameters": [
        "datatype: Data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_contiguous",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_contiguous(int count, MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "The simplest datatype constructor is MPI_Type_contiguous, which allows replication of a datatype into contiguous locations.",
      "input_parameters": [
        "count: Replication count (nonnegative integer).",
        "oldtype: Old datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New datatype (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_darray",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_darray(int size, int rank, int ndims, const int array_of_gsizes[], const int array_of_distribs[], const int array_of_dargs[], const int array_of_psizes[], int order, MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "MPI_Type_create_darray can be used to generate the data types corresponding to the distribution of an ndims-dimensional array of oldtype elements onto an ndims-dimensional grid of logical processes. Unused dimensions of array_of_psizes should be set to 1. For a call to MPI_Type_create_darray to be correct, the equation",
      "input_parameters": [
        "size: Size of process group (positive integer).",
        "rank: Rank in process group (nonnegative integer).",
        "ndims: Number of array dimensions as well as process grid dimensions (positive integer).",
        ": array_of_gsizes: Number of elements of type oldtype in each dimension of global array (array of positive integers).",
        ": array_of_distribs: Distribution of array in each dimension (array of state).",
        "array_of_dargs: Distribution argument in each dimension (array of positive integers).",
        ": array_of_psizes: Size of process grid in each dimension (array of positive integers).",
        ": order: Array storage order flag (state).",
        "oldtype: Old data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_f90_complex",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype)",
      "description": "This function provides a way to declare KIND-parameterized COMPLEX MPIdatatypes. The arguments are interpreted in a similar fashion to theF90 function SELECTED_REAL_KIND. The parameters p and rmust be scalar integers. The argument p represents the requiredlevel of numerical precision, in decimal digits. The r parameterindicates the range of exponents desired: the returned datatype willhave at least one exponent between +r and -r (inclusive).",
      "input_parameters": [
        "p: Precision, in decimal digits (integer).",
        "r: Decimal exponent range (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_f90_integer",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_f90_integer(int r, MPI_Datatype *newtype)",
      "description": "This function provides a way to declare KIND-parameterized INTEGER MPIdatatypes. The argument is interpreted in a similar fashion to the F90function SELECTED_INT_KIND: r must be a scalar integer, andrepresents the desired level of numerical precision, in decimaldigits.",
      "input_parameters": [
        "r: Precision, in decimal digits (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_f90_real",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype)",
      "description": "This function provides a way to declare KIND-parameterized REAL MPIdatatypes. The arguments are interpreted in a similar fashion to theF90 function SELECTED_REAL_KIND. The parameters p and rmust be scalar integers. The argument p represents the requiredlevel of numerical precision, in decimal digits. The r parameterindicates the range of exponents desired: the returned datatype willhave at least one exponent between +r and -r (inclusive).",
      "input_parameters": [
        "p: Precision, in decimal digits (integer).",
        "r: Decimal exponent range (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_hindexed",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_hindexed(int count, const int array_of_blocklengths[], const MPI_Aint array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "The function MPI_Type_create_hindexed is identical to MPI_Type_indexed, except that block displacements in array_of_displacements are specified in bytes, rather than in multiples of the oldtype extent.",
      "input_parameters": [
        "count: Number of blocks -- also number of entries in array_of_displacements and: array_of_blocklengths (nonnegative integer).",
        "array_of_blocklengths: Number of elements per block (array of nonnegative integers).",
        "array_of_displacements: Displacement for each block, in multiples of oldtype extent for MPI_Type_indexed and bytes for MPI_Type_create_hindexed (array of integer for MPI_TYPE_INDEXED, array of MPI_Aint for MPI_TYPE_CREATE_HINDEXED ).",
        "oldtype: Old datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New datatype (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_hindexed_block",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_hindexed_block(int count, int blocklength, const MPI_Aint array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "MPI_Type_create_indexed_block and MPI_Type_create_hindexed_block create an indexed data type with the same block length for all blocks. The only difference between the two functions is MPI_Type_create_indexed_block takes an array of displacements in units of the extent of oldtype while MPI_Type_create_hindexed_block takes displacements in bytes.",
      "input_parameters": [
        "count: Length of array of displacements (integer).",
        "blocklength: Size of block (integer).",
        "array_of_displacements: Array of displacements (array of integers). In units of the extent of oldtype for MPI_Type_create_indexed_block and bytes for MPI_Type_create_hindexed_block.",
        "oldtype: Old data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_hvector",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_hvector(int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "MPI_Type_create_hvector creates a vector (strided) data type with offset in bytes.",
      "input_parameters": [
        "count: Number of blocks (nonnegative integer).",
        "blocklength: Number of elements in each block (nonnegative integer).",
        "stride: Number of bytes between start of each block (integer).",
        "oldtype: Old data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_indexed_block",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_indexed_block(int count, int blocklength, const int array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "MPI_Type_create_indexed_block and MPI_Type_create_hindexed_block create an indexed data type with the same block length for all blocks. The only difference between the two functions is MPI_Type_create_indexed_block takes an array of displacements in units of the extent of oldtype while MPI_Type_create_hindexed_block takes displacements in bytes.",
      "input_parameters": [
        "count: Length of array of displacements (integer).",
        "blocklength: Size of block (integer).",
        "array_of_displacements: Array of displacements (array of integers). In units of the extent of oldtype for MPI_Type_create_indexed_block and bytes for MPI_Type_create_hindexed_block.",
        "oldtype: Old data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_keyval",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn, MPI_Type_delete_attr_function *type_delete_attr_fn, int *type_keyval, void *extra_state)",
      "description": "MPI_Type_create_keyval generates a new attribute key for caching on data types. This routine partially replaces MPI_Keyval_create.",
      "input_parameters": [
        "type_copy_attr_fn: Copy callback function for type_keyval (function).",
        "type_delete_attr_fn: Delete callback function for type_keyval (function).",
        "extra_state: Extra state for callback functions."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "type_keyval: Key value for future access (integer)."
      ]
    },
    {
      "name": "MPI_Type_create_resized",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype)",
      "description": "MPI_Type_create_resized returns in newtype a handle to a new data type that is identical to oldtype, except that the lower bound of this new data type is set to be lb, and its upper bound is set to be lb + extent. Any previous lb and ub markers are erased, and a new pair of lower bound and upper bound markers are put in the positions indicated by the lb and extent arguments. This affects the behavior of the data type when used in communication operations, with count > 1, and when used in the construction of new derived data types.",
      "input_parameters": [
        "oldtype: Input data type (handle).",
        "lb: New lower bound of data type (integer).",
        "extent: New extent of data type (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: Output data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_struct",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_struct(int count, int array_of_blocklengths[], const MPI_Aint array_of_displacements[], const MPI_Datatype array_of_types[], MPI_Datatype *newtype)",
      "description": "MPI_Type_create_struct creates a structured data type. This routine replaces MPI_Type_struct, which is now deprecated.",
      "input_parameters": [
        "count: Number of blocks (integer) -- also number of entries in arrays array_of_types, array_of_displacements, and array_of_blocklengths.",
        "array_of_blocklengths: Number of elements in each block (array of integers).",
        "array_of_displacements: Byte displacement of each block (array of integers).",
        "array_of_types: Type of elements in each block (array of handles to data-type objects)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_create_subarray",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_create_subarray(int ndims, const int array_of_sizes[], const int array_of_subsizes[], const int array_of_starts[], int order, MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "The subarray type constructor creates an MPI data type describing an n-dimensional subarray of an n-dimensional array. The subarray may be situated anywhere within the full array, and may be of any nonzero size up to the size of the larger array as long as it is confined within this array. This type constructor facilitates creating file types to access arrays distributed in blocks among processes to a single file that contains the global array.",
      "input_parameters": [
        "ndims: Number of array dimensions (positive integer).",
        "array_of_sizes: Number of elements of type oldtype in each dimension of the full array (array of positive integers).",
        "array_of_subsizes: Number of elements of type oldtype in each dimension of the subarray (array of positive integers).",
        "array_of_starts: Starting coordinates of the subarray in each dimension (array of nonnegative integers).",
        "order: Array storage order flag (state).",
        "oldtype: Array element data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New data type (handle)."
      ]
    },
    {
      "name": "MPI_Type_delete_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_delete_attr(MPI_Datatype type, int type_keyval)",
      "description": "MPI_Type_delete_attr deletes a datatype-caching attribute value associated with a key. This routines partially replaces MPI_Attr_delete, which is now deprecated.",
      "input_parameters": [
        "type_keyval: Key value (integer)."
      ],
      "input_output_parameters": [
        "type: Data type from which the attribute is deleted (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_dup",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_dup(MPI_Datatype type, MPI_Datatype *newtype)",
      "description": "MPI_Type_dup is a type constructor that duplicates the existing type with associated key values. For each key value, the respective copy callback function determines the attribute value associated with this key in the new communicator. One particular action that a copy callback may take is to delete the attribute from the new data type. Returns in newtype a new data type with exactly the same properties as type, as well as any copied cached information. The new data type has identical upper bound and lower bound and yields the same net result when fully decoded with the functions described in Section 8.6 of the MPI-2 standard. newtype has the same committed state as the old type.",
      "input_parameters": [
        "type: Data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: Copy of type (handle)."
      ]
    },
    {
      "name": "MPI_Type_extent",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_extent(MPI_Datatype datatype, MPI_Aint *extent)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Type_get_extent instead.",
      "input_parameters": [
        "datatype: Datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "extent: Datatype extent (integer)."
      ]
    },
    {
      "name": "MPI_Type_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Datatype MPI_Type_f2c(MPI_Fint datatype)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_free(MPI_Datatype *datatype)",
      "description": "Marks the datatype object associated with datatype for de-allocation and sets datatype to MPI_DATATYPE_NULL. Any communication that is currently using this datatype will complete normally. Derived datatypes that were defined from the freed datatype are not affected.",
      "input_parameters": [],
      "input_output_parameters": [
        "datatype: Datatype that is freed (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_free_keyval",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_free_keyval(int *type_keyval)",
      "description": "",
      "input_parameters": [],
      "input_output_parameters": [
        "type_keyval: Key value to free (integer)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_get_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_attr(MPI_Datatype type, int type_keyval, void *attribute_val, int *flag)",
      "description": "For the given data type, MPI_Type_get_attr returns an attribute value that corresponds to the specified key value.",
      "input_parameters": [
        "type: Data type to which the attribute is attached (handle).",
        "type_keyval: Key value (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "attribute_val: Attribute value, unless flag = false",
        "flag: \"false\" if no attribute is associated with the key (logical)."
      ]
    },
    {
      "name": "MPI_Type_get_contents",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_contents(MPI_Datatype datatype, int max_integers, int max_addresses, int max_datatypes, int array_of_integers[], MPI_Aint array_of_addresses[], MPI_Datatype array_of_datatypes[])",
      "description": "For the given data type, MPI_Type_get_envelope returns information on the number and type of input arguments used in the call that created the data type. The number-of-arguments values returned can be used to provide sufficiently large arrays in the decoding routine MPI_Type_get_contents. This call and the meaning of the returned values is described below. The combiner reflects the MPI data type constructor call that was used in creating datatype.The parameter datatype must be a predefined unnamed or a derived data type. The call is erroneous if datatype is a predefined named data type.",
      "input_parameters": [
        "datatype: Data type to access (handle).",
        "max_integers: Number of elements in array_of_integers (nonnegative integer).",
        "max_addresses: Number of elements in array_of_addresses (nonnegative integer).",
        "max_datatypes: Number of elements in array_of_datatypes (nonnegative integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "array_of_integers: Contains integer arguments used in constructing datatype (array of integers).",
        "array_of_addresses: Contains address arguments used in constructing datatype (array of integers).",
        "array_of_datatypes: Contains data-type arguments used in constructing datatype (array of integers)."
      ]
    },
    {
      "name": "MPI_Type_get_envelope",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_envelope(MPI_Datatype datatype, int *num_integers, int *num_addresses, int *num_datatypes, int *combiner)",
      "description": "For the given data type, MPI_Type_get_envelope returns information on the number and type of input arguments used in the call that created the data type. The number-of-arguments values returned can be used to provide sufficiently large arrays in the decoding routine MPI_Type_get_contents. This call and the meaning of the returned values is described below. The combiner reflects the MPI data type constructor call that was used in creating datatype.",
      "input_parameters": [
        "datatype: Data type to access (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "num_integers: Number of input integers used in the call constructing combiner (nonnegative integer).",
        "num_addresses: Number of input addresses used in the call constructing combiner (nonnegative integer).",
        "num_datatypes: Number of input data types used in the call constructing combiner (nonnegative integer).",
        "combiner: Combiner (state)."
      ]
    },
    {
      "name": "MPI_Type_get_extent",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_extent(MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *extent)",
      "description": "MPI_Type_get_extent returns the lower bound and the extent of datatype. For either function, if either the lb or extent parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED.",
      "input_parameters": [
        "datatype: Data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "lb: Lower bound of data type (integer).",
        "extent: Data type extent (integer)."
      ]
    },
    {
      "name": "MPI_Type_get_extent_x",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_extent_x(MPI_Datatype datatype, MPI_Count *lb, MPI_Count *extent)",
      "description": "MPI_Type_get_extent_x returns the lower bound and the extent of datatype. For either function, if either the lb or extent parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED.",
      "input_parameters": [
        "datatype: Data type (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "lb: Lower bound of data type (integer).",
        "extent: Data type extent (integer)."
      ]
    },
    {
      "name": "MPI_Type_get_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_name(MPI_Datatype type, char *type_name, int *resultlen)",
      "description": "MPI_Type_get_name returns the printable identifier associated with an MPI data type.",
      "input_parameters": [
        "type: Data type whose name is to be returned (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "type_name: The name previously stored on the data type, or an empty string if not such name exists (string).",
        "resultlen: Length of returned name (integer)."
      ]
    },
    {
      "name": "MPI_Type_get_true_extent",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb, MPI_Aint *true_extent)",
      "description": "The true_lb parameter returns the offset of the lowest unit of store that is addressed by the data type, that is, the lower bound of the corresponding typemap, ignoring MPI_LB markers. The true_extent parameter returns the true size of the data type, that is, the extent of the corresponding typemap, ignoring MPI_LB and MPI_UB markers, and performing no rounding for alignment. For both functions, if either the true_lb or true_extent parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED.",
      "input_parameters": [
        "datatype: Data type for which information is wanted (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "true_lb: True lower bound of data type (integer).",
        "true_extent: True size of data type (integer)."
      ]
    },
    {
      "name": "MPI_Type_get_true_extent_x",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_get_true_extent_x(MPI_Datatype datatype,MPI_Count *true_lb, MPI_Count *true_extent)",
      "description": "Before the error value is returned, the current MPI error handler iscalled. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error.",
      "input_parameters": [
        "datatype: Data type for which information is wanted (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "true_lb: True lower bound of data type (integer).",
        "true_extent: True size of data type (integer)."
      ]
    },
    {
      "name": "MPI_Type_hindexed",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_hindexed(int count, int *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Use MPI_Type_create_hindexed instead.",
      "input_parameters": [
        "count: Number of blocks -- also number of entries in array_of_displacements and: array_of_blocklengths (integer).",
        "array_of_blocklengths: Number of elements in each block (array of nonnegative integers).",
        "array_of_displacements: Byte displacement of each block (C: array of MPI_Aint, Fortran: array of integer).",
        "oldtype: Old datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New datatype (handle)."
      ]
    },
    {
      "name": "MPI_Type_hvector",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_hvector(int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Use MPI_Type_create_hvector instead.",
      "input_parameters": [
        "count: Number of blocks (nonnegative integer).",
        "blocklength: Number of elements in each block (nonnegative integer).",
        "stride: Number of bytes between start of each block (integer).",
        "oldtype: Old datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New datatype (handle)."
      ]
    },
    {
      "name": "MPI_Type_indexed",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_indexed(int count, const int array_of_blocklengths[], const int array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "The function MPI_Type_indexed allows replication of an old datatype into a sequence of blocks (each block is a concatenation of the old datatype), where each block can contain a different number of copies and have a different displacement. All block displacements are multiples of the old data type's extent.",
      "input_parameters": [
        "count: Number of blocks -- also number of entries in array_of_displacements and array_of_blocklengths (nonnegative integer).",
        "array_of_blocklengths: Number of elements per block (array of nonnegative integers).",
        "array_of_displacements: Displacement for each block, in multiples of oldtype extent for MPI_Type_indexed and bytes for MPI_Type_create_hindexed (array of integer for MPI_TYPE_INDEXED, array of MPI_Aint for MPI_TYPE_CREATE_HINDEXED ).",
        "oldtype: Old datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New datatype (handle)."
      ]
    },
    {
      "name": "MPI_Type_lb",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_lb(MPI_Datatype datatype, MPI_Aint *displacement)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Type_get_extent instead.",
      "input_parameters": [
        "datatype: Datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "displacement: Displacement of lower bound from origin, in bytes (integer)."
      ]
    },
    {
      "name": "MPI_Type_match_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *type)",
      "description": "The function returns an MPI datatype matching a local variable of type(typeclass, size). The returned type is a reference(handle) to a predefined named datatype, not a duplicate. This typecannot be freed.",
      "input_parameters": [
        "typeclass: Generic type specifier (integer).",
        "size: Size, in bytes, of representation (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "type: Datatype with correct type and size (handle)."
      ]
    },
    {
      "name": "MPI_Type_set_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_set_attr(MPI_Datatype type, int type_keyval, void *attribute_val)",
      "description": "For the given data type, MPI_Type_set_attr sets the key value to the value of the specified attribute.",
      "input_parameters": [
        "type_keyval: Key value (integer).",
        "attribute_val: Attribute value."
      ],
      "input_output_parameters": [
        "type: Data type to which attribute will be attached (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_set_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_set_name(MPI_Datatype type, const char *type_name)",
      "description": "MPI_Type_set_name associates a printable identifier with an MPI data type.",
      "input_parameters": [
        "type_name: The character string remembered as the name (string)."
      ],
      "input_output_parameters": [
        "type: Data type for which the identifier is to be set (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Type_size",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_size(MPI_Datatype datatype, int *size)",
      "description": "MPI_Type_size returns the total size, in bytes, of the entries in the type signature associated with datatype; i.e., the total size of the data in a message that would be created with this datatype. Entries that occur multiple times in the datatype are counted with their multiplicity. For either function, if the size parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED.",
      "input_parameters": [
        "datatype: Datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Datatype size (integer)."
      ]
    },
    {
      "name": "MPI_Type_size_x",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_size_x(MPI_Datatype datatype, MPI_Count *size)",
      "description": "MPI_Type_size_x returns the total size, in bytes, of the entries in the type signature associated with datatype; i.e., the total size of the data in a message that would be created with this datatype. Entries that occur multiple times in the datatype are counted with their multiplicity. For either function, if the size parameter cannot express the value to be returned (e.g., if the parameter is too small to hold the output value), it is set to MPI_UNDEFINED.",
      "input_parameters": [
        "datatype: Datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Datatype size (integer)."
      ]
    },
    {
      "name": "MPI_Type_struct",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_struct(int count, int *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Datatype *array_of_types, MPI_Datatype *newtype)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Use MPI_Type_create_struct instead.",
      "input_parameters": [
        "count: Number of blocks (integer) also number of entries in arrays: array_of_types, array_of_displacements, and array_of_blocklengths.",
        "array_of_blocklengths: Number of elements in each block (array).",
        "array_of_displacements: Byte displacement of each block (array).",
        "array_of_types: Type of elements in each block (array of handles to datatype objects)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New datatype (handle)."
      ]
    },
    {
      "name": "MPI_Type_ub",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_ub(MPI_Datatype datatype, MPI_Aint *displacement)",
      "description": "Note that use of this routine is deprecated as of MPI-2. Please use MPI_Type_get_extent instead.",
      "input_parameters": [
        "datatype: Datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "displacement: Displacement of upper bound from origin, in bytes (integer)."
      ]
    },
    {
      "name": "MPI_Type_vector",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Type_vector(int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype)",
      "description": "The function MPI_Type_vector is a general constructor that allows replication of a datatype into locations that consist of equally spaced blocks. Each block is obtained by concatenating the same number of copies of the old datatype. The spacing between blocks is a multiple of the extent of the old datatype.",
      "input_parameters": [
        "count: Number of blocks (nonnegative integer).",
        "blocklength: Number of elements in each block (nonnegative integer).",
        "stride: Number of elements between start of each block (integer).",
        "oldtype: Old datatype (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "newtype: New datatype (handle)."
      ]
    },
    {
      "name": "MPI_T_category_changed",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_category_changed(int *stamp)",
      "description": "If two subsequent calls to this routine return the same timestamp, it is guaranteed that no categories have been changed or added. If the timestamp from the second call is higher than some categories have been added or changed.",
      "input_parameters": [
        "stamp: A virtual time stamp to indicate the last change to the categories."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_category_get_categories",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_category_get_categories(int cat_index, int len, int indices[])",
      "description": "MPI_T_category_get_categories can be used to query which other categories are in a category.",
      "input_parameters": [
        "cat_index: Index of the category to be queried.",
        "len: The length of the indices array."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "indices: An integer array of size len, indicating category indices."
      ]
    },
    {
      "name": "MPI_T_category_get_cvars",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_category_get_cvars(int cat_index, int len, int indices[])",
      "description": "MPI_T_category_get_cvars can be used to query which control variables are contained in a particular category.",
      "input_parameters": [
        "cat_index: Index of the category to be queried.",
        "len: The length of the indices array."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "indices: An integer array of size len, indicating control variable indices."
      ]
    },
    {
      "name": "MPI_T_category_get_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_category_get_info(int cat_index, char *name, int *name_len, char *desc, int *desc_len, int *num_cvars, int *num_pvars, int *num_categories)",
      "description": "MPI_T_category_get_info can be used to query information from a category. The function returns the number of control variables, performance variables, and sub-categories in the queried category in the arguments num_cvars, num_pvars, and num_categories, respectively.",
      "input_parameters": [
        "cat_index: Index of the category to be queried."
      ],
      "input_output_parameters": [
        "name_len: Length of the string and/or buffer for name.",
        "desc_len: Length of the string and/or buffer for desc."
      ],
      "output_parameters": [
        "name: Buffer to return the string containing the name of the: category.",
        "desc: Buffer to return the string containing the description: of the category.",
        "num_cvars: Number of control variables in the category.",
        "num_pvars: Number of performance variables in the category.",
        "num_categories: Number of categories contained in the category."
      ]
    },
    {
      "name": "MPI_T_category_get_num",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_category_get_num(int *num_cat)",
      "description": "MPI_T_category_get_num can be used to query the current number of categories.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "num_cat: Current number of categories"
      ]
    },
    {
      "name": "MPI_T_category_get_pvars",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_category_get_pvars(int cat_index, int len, int indices[])",
      "description": "MPI_T_category_get_pvars can be used to query which performance variables are contained in a particular category. A category contains zero or more performance variables.",
      "input_parameters": [
        "cat_index: Index of the category to be queried.",
        "len: The length of the indices array."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "indices: An integer array of size len, indicating performance variable indices."
      ]
    },
    {
      "name": "MPI_T_cvar_get_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_cvar_get_info(int cvar_index, char *name, int *name_len, int *verbosity, MPI_Datatype *datatype, MPI_T_enum *enumtype, const *desc, int *desc_len, int *bind, int *scope)",
      "description": "MPI_T_cvar_get_info can be used to query information about a control variable. The function returnsthe verbosity, datatype, enumeration type, binding, and scope of the queried control variable in the argumentsverbosity, datatype, enumtype, bind, and scope, respectively. Control variablesin Open MPI are the same as MCA parameters.",
      "input_parameters": [
        "cvar_index: Index of the control variable to be queried."
      ],
      "input_output_parameters": [
        "name_len: Length of the string and/or buffer for name.",
        "desc_len: Length of the string and/or buffer for desc."
      ],
      "output_parameters": [
        "name: Buffer to return the string containing the name of the: control variable.",
        "verbosity: Verbosity level of this variable.",
        "datatype: MPI datatype of the information stored in the control: variable.",
        "enumtype: Optional descriptor for enumeration information.",
        "desc: Buffer to return the string containing the description: of the control variable.",
        "bind: Type of MPI object to which this variable must be: bound.",
        "scope: Scope of when changes to this variable are possible."
      ]
    },
    {
      "name": "MPI_T_cvar_get_num",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_cvar_get_num(int *num_cvar)",
      "description": "MPI_T_cvar_get_num can be used to query the current number of control variables. The number of control variables may increase throughout the execution of the process but will never decrease.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "num_cvar: Current number of control variables."
      ]
    },
    {
      "name": "MPI_T_cvar_handle_alloc",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_cvar_handle_alloc(int cvar_index, void *obj_handle, MPI_T_cvar_handle *handle, int *count)",
      "description": "MPI_T_cvar_handle_alloc binds the control variable specified in cvar_index to the MPI object specified in obj_handle. If MPI_T_cvar_get_info returns MPI_T_BIND_NO_OBJECT as the binding of the variable the obj_handle argument is ignored. The number of values represented by this control variable is returned in the count parameter. If the control variable represents a string then count will be the maximum length of the string.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_cvar_handle_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_cvar_handle_free(MPI_T_cvar_handle *handle)",
      "description": "MPI_T_cvar_handle_free frees a handle allocated by MPI_T_cvar_handle_alloc and sets the handle argument to MPI_T_CVAR_HANDLE_NULL.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_cvar_read",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_cvar_read(MPI_T_cvar_handle handle, const void *buf)",
      "description": "MPI_T_cvar_read reads the value of the control variable identified by the handle specified in handle and stores the value in the buffer pointed to by buf. The caller must ensure that the buffer pointed to by buf is large enough to hold the entire value of the control variable.",
      "input_parameters": [
        "handle: Handle of the control variable to be read.",
        "buf: Initial address of storage location for variable value."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_cvar_write",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_cvar_write(MPI_T_cvar_handle handle, const void *buf)",
      "description": "MPI_T_cvar_write sets the value the control variable identified by the handle specified in handle from the buffer provided in buf. The caller must ensure that the buffer specified in buf is large enough to hold the entire value of the control variable. If the variable has global scope, any write call must be issued on all connected MPI processes.",
      "input_parameters": [
        "handle: Handle of the control variable to be written.",
        "buf: Initial address of storage location for variable value."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_enum_get_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len)",
      "description": "MPI_T_enum_get_info can be used to query information about an enumerator. The function returns the number of discrete values represented by this enumerator in the num parameter.",
      "input_parameters": [
        "enumtype: Enumerator to be queried."
      ],
      "input_output_parameters": [
        "name_len: Length of the string and/or buffer for name."
      ],
      "output_parameters": [
        "li: num: number of discrete values represented by this enumeration.",
        "name: Buffer to return the string containing the name of the: category."
      ]
    },
    {
      "name": "MPI_T_enum_get_item",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, int *name_len)",
      "description": "MPI_T_enum_get_item can be used to query information about an item in an enumerator. This function returns the enumeration value in the value parameter.",
      "input_parameters": [
        "enumtype: Enumeration to be queried.",
        "index: Number of the value to be queried in this enumeration."
      ],
      "input_output_parameters": [
        "name_len: Length of the string and/or buffer for name."
      ],
      "output_parameters": [
        "li: value: Variable value.",
        "name: Buffer to return the string containing the name of the: category."
      ]
    },
    {
      "name": "MPI_T_finalize",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_finalize(void)",
      "description": "MPI_T_finalize() finalizes the MPI tool information interface and must be called the same number of times as MPI_T_init_thread() by the end of execution. Calls to MPI tool functions are allowed at any point in execution as long as MPI_T_init_thread() has been called at least once and the number of calls to MPI_T_init_thread() is greater than the number of calls to MPI_T_finalize(). If at any point in execution the number of calls to MPI_T_finalize() equals the number of calls to MPI_T_init_thread() the MPI tool interface will no longer be available until another call to MPI_T_init_thread().",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_init_thread",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_init_thread(int required, int *provided)",
      "description": "MPI_T_init_thread() initializes the MPI tool information interface. Calls to MPI tool functionsare allowed at any point in execution (including before MPI_Init() and after MPI_Finalize()) aslong as MPI_T_init_thread() has been called at least once and the number of calls toMPI_T_init_thread() is greater than the number of calls to MPI_T_finalize(). If at any point inexecution the number of calls to MPI_T_finalize() equals the number of calls toMPI_T_init_thread() the MPI tool interface will no longer be available until another call toMPI_T_init_thread().",
      "input_parameters": [
        "required: Desired level of thread support (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "provided: Available level of thread support (integer)."
      ]
    },
    {
      "name": "MPI_T_pvar_get_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, int *verbosity, int *var_class, MPI_Datatype *datatype, MPI_T_enum *enumtype, char *desc, int *desc_len, int *bind, int *readonly, int *continuous, int *atomic)",
      "description": "MPI_T_pvar_get_info can be used to query information from a performance variable. The function returns the verbosity, class, datatype, enumeration type, and binding of the queried control variable in the arguments verbosity, var_class, datatype, enumtype, and bind respectively. Flags indicating whether the variable is read-only, continuous, or atomic are returns in readonly, continuous, and atomic accordingly. See MPI-3 § 14.3.7 for more information. See the man page for MPI_T_cvar_get_info for information on variable verbosity.",
      "input_parameters": [
        "pvar_index: Index of the performance variable to be queried."
      ],
      "input_output_parameters": [
        "name_len: Length of the string and/or buffer for name.",
        "desc_len: Length of the string and/or buffer for desc."
      ],
      "output_parameters": [
        "name: Buffer to return the string containing the name of the: performance variable.",
        "verbosity: Verbosity level of this variable.",
        "var_class: Class of performance variable.",
        "datatype: MPI datatype of the information stored in the performance variable.",
        "enumtype: Optional descriptor for enumeration information.",
        "desc: Buffer to return the string containing the description of the performance variable.",
        "bind: Type of MPI object to which this variable must be bound.",
        "readonly: Flag indicating whether the variable can be written/reset.",
        "continuous: Flag indicating whether the variable can be started and stopped or is continuously active.",
        "atomic: Flag indicating whether the variable can be atomically read and reset."
      ]
    },
    {
      "name": "MPI_T_pvar_get_num",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_get_num(int *num_pvar)",
      "description": "MPI_T_pvar_get_num can be used to query the current number of performance variables. The number of performance variables may increase throughout the exection of the process but will never decrease.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [
        "num_pvar: Current number of performance variables."
      ]
    },
    {
      "name": "MPI_T_pvar_handle_alloc",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_handle_alloc(int session, int pvar_index, void *obj_handle, MPI_T_pvar_handle *handle, int *count)",
      "description": "MPI_T_pvar_handle_alloc binds the performance variable specified in pvar_index to the MPI object specified in obj_handle in the session identified by the parameter session. The object is passed in the argument obj_handle as an address to a local variable that stores the object’s handle. If MPI_T_pvar_get_info returns MPI_T_BIND_NO_OBJECT as the binding for the variable the obj_handle argument is ignored. The handle allocated to reference the variable is returned in the argument handle. Upon successful return, count contains the number of elements (of the datatype returned by a previous MPI_T_PVAR_GET_INFO call) used to represent this variable.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_handle_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_handle_free(int session, MPI_T_pvar_handle *handle)",
      "description": "MPI_T_pvar_handle_free frees a handle allocated by MPI_T_pvar_handle_alloc and sets the handle argument to MPI_T_PVAR_HANDLE_NULL.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_read",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_read(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const void *buf)",
      "description": "MPI_T_pvar_readreset atomically queries and resets the value of a performance variable bound to the handle specified by handle in the session specified by session. The result is stored in the buffer pointed to by buf. This function can only be used with performance variables that are atomic and not readonly. The caller must ensure that the buffer pointed to by buf is large enough to hold the entire value of the performance variable.",
      "input_parameters": [
        "session: Performance experiment session.",
        "handle: Performance variable handle.",
        "buf: Initial address of storage location for variable value."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_readreset",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_readreset(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const void *buf)",
      "description": "MPI_T_pvar_readreset atomically queries and resets the value of a performance variable bound to the handle specified by handle in the session specified by session. The result is stored in the buffer pointed to by buf. This function can only be used with performance variables that are atomic and not readonly. The caller must ensure that the buffer pointed to by buf is large enough to hold the entire value of the performance variable.",
      "input_parameters": [
        "session: Performance experiment session.",
        "handle: Performance variable handle.",
        "buf: Initial address of storage location for variable value."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_reset",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle)",
      "description": "MPI_T_pvar_reset sets the performance variable specified by the handle in handle to its initial value. The special value MPI_T_PVAR_ALL_HANDLES can be passed in handle to reset all read-write handles in the session specified in session.",
      "input_parameters": [
        "session: Performance experiment session.",
        "handle: Performance variable handle or MPI_T_PVAR_ALL_HANDLES."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_session_create",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_session_create(MPI_T_pvar_session *session)",
      "description": "MPI_T_pvar_session_create creates a session for accessing performance variables. The new session is returned in the session parameter.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_session_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_session_free(MPI_T_pvar_session *session)",
      "description": "MPI_T_pvar_session_free releases a session allocated by MPI_T_pvar_session_create and sets the session parameter to MPI_T_PVAR_SESSION_NULL.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_start",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_start(MPI_T_pvar_session session, MPI_T_pvar_handle handle)",
      "description": "MPI_T_pvar_start starts the performance variable with the handle specified in handle. The special value MPI_T_PVAR_ALL_HANDLES can be passed in handle to start all non-continuous handles in the session specified in session.",
      "input_parameters": [
        "session: Performance experiment session.",
        "handle: Performance variable handle."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_stop",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_stop(MPI_T_pvar_session session, MPI_T_pvar_handle handle)",
      "description": "MPI_T_pvar_stop stops the performance variable with the handle specified in handle. The special value MPI_T_PVAR_ALL_HANDLES can be passed in handle to stop all non-continuous handles in the session specified in session.",
      "input_parameters": [
        "session: Performance experiment session.",
        "handle: Performance variable handle."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_T_pvar_write",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_T_pvar_write(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const void *buf)",
      "description": "MPI_T_pvar_write attempts to set the value of the performance variable identified by the handle specified in handle in the session specified in session. The value to be written is specified in buf. The caller must ensure that the buffer specified in buf is large enough to hold the entire value of the performance variable.",
      "input_parameters": [
        "session: Performance experiment session.",
        "handle: Performance variable handle.",
        "buf: Initial address of storage location for variable value."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Unpack",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf, int outcount, MPI_Datatype datatype, MPI_Comm comm)",
      "description": "Unpacks a message into the receive buffer specified by outbuf, outcount, datatype from the buffer space specified by inbuf and insize. The output buffer can be any communication buffer allowed in MPI_Recv. The input buffer is a contiguous storage area containing insize bytes, starting at address inbuf. The input value of position is the first location in the input buffer occupied by the packed message. position is incremented by the size of the packed message, so that the output value of position is the first location in the input buffer after the locations occupied by the message that was unpacked. comm is the communicator used to receive the packed message.",
      "input_parameters": [
        "inbuf: Input buffer start (choice).",
        "insize: Size of input buffer, in bytes (integer).",
        "outcount: Number of items to be unpacked (integer).",
        "datatype: Datatype of each output data item (handle).",
        "comm: Communicator for packed message (handle)."
      ],
      "input_output_parameters": [
        "position: Current position in bytes (integer)."
      ],
      "output_parameters": [
        "outbuf: Output buffer start (choice)."
      ]
    },
    {
      "name": "MPI_Unpack_external",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Unpack_external(const char datarep[], const void *inbuf, MPI_Aint insize, MPI_Aint *position, void *outbuf, int outcount, MPI_Datatype datatype)",
      "description": "MPI_Unpack_external unpacks data from the external32 format, auniversal data representation defined by the MPI Forum. This format isuseful for exchanging data between MPI implementations, or whenwriting data to a file.",
      "input_parameters": [
        "datarep: Data Representation (string).",
        "inbuf: Input buffer start (choice).",
        "insize: Size of input buffer, in bytes (integer).",
        "outcount: Number of items to be unpacked (integer).",
        "datatype: Datatype of each output data item (handle)."
      ],
      "input_output_parameters": [
        "position: Current position in buffer, in bytes (integer)."
      ],
      "output_parameters": [
        "outbuf: Output buffer start (choice)."
      ]
    },
    {
      "name": "MPI_Unpublish_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port_name)",
      "description": "This routine removes the pair (service_name, port_name) so thatapplications may no longer retrieve port_name by callingMPI_Lookup_name. It is an error to unpublish a service_namethat was not published via MPI_Publish_name. Both the service_nameand port_name arguments to MPI_Unpublish_name must be identicalto the arguments to the previous call to MPI_Publish_name. INFO ARGUMENTSThe following keys for info are recognized:",
      "input_parameters": [
        "service_name: A service name (string).",
        "info: Options to the name service functions (handle).",
        "port_name: A port name (string)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Wait",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Wait(MPI_Request *request, MPI_Status *status)",
      "description": "A call to MPI_Wait returns when the operation identified by request is complete. If the communication object associated with this request was created by a nonblocking send or receive call, then the object is deallocated by the call to MPI_Wait and the request handle is set to MPI_REQUEST_NULL.",
      "input_parameters": [
        "request: Request (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Waitall",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Waitall(int count, MPI_Request array_of_requests[], MPI_Status *array_of_statuses)",
      "description": "Blocks until all communication operations associated with active handles in the list complete, and returns the status of all these operations (this includes the case where no handle in the list is active). Both arrays have the same number of valid entries. The ith entry in array_of_statuses is set to the return status of the ith operation. Requests that were created by nonblocking communication operations are deallocated, and the corresponding handles in the array are set to MPI_REQUEST_NULL. The list may contain null or inactive handles. The call sets to empty the status of each such entry.",
      "input_parameters": [
        "count: Lists length (integer).",
        "array_of_requests: Array of requests (array of handles)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "array_of_statuses: Array of status objects (array of status)."
      ]
    },
    {
      "name": "MPI_Waitany",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Waitany(int count, MPI_Request array_of_requests[], int *index, MPI_Status *status)",
      "description": "A call to MPI_Waitany can be used to wait for the completion of one out of several requests.",
      "input_parameters": [
        "count: List length (integer).",
        "array_of_requests: Array of requests (array of handles)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "index: Index of handle for operation that completed (integer). In the range 0 to: count-1. In Fortran, the range is 1 to count.",
        "status: Status object (status)."
      ]
    },
    {
      "name": "MPI_Waitsome",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Waitsome(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[])",
      "description": "Waits until at least one of the operations associated with active handles in the list have completed. Returns in outcount the number of requests from the list array_of_requests that have completed. Returns in the first outcount locations of the array array_of_indices the indices of these operations (index within the array array_of_requests; the array is indexed from 0 in C and from 1 in Fortran). Returns in the first outcount locations of the array array_of_status the status for these completed operations. If a request that completed was allocated by a nonblocking communication call, then it is deallocated, and the associated handle is set to MPI_REQUEST_NULL.",
      "input_parameters": [
        "incount: Length of array_of_requests (integer).",
        "array_of_requests: Array of requests (array of handles)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "outcount: Number of completed requests (integer).",
        "array_of_indices: Array of indices of operations that completed (array of integers).",
        "array_of_statuses: Array of status objects for operations that completed (array of status)."
      ]
    },
    {
      "name": "MPI_Win_allocate",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_allocate (MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, void *baseptr, MPI_Win *win)",
      "description": "MPI_Win_allocate is a collective call executed by all processesin the group of comm. On each process, it allocates memory of atleast size bytes, returns a pointer to it, and returns a windowobject that can be used by all processes in comm to perform RMAoperations. The returned memory consists of size bytes local toeach process, starting at address baseptr and is associated withthe window as if the user called MPI_Win_create on existingmemory. The size argument may be different at each process andsize = 0 is valid; however, a library might allocate and exposemore memory in order to create a fast, globally symmetricallocation. The discussion of and rationales for MPI_Alloc_mem andMPI_Free_mem in MPI-3.1 [char167] 8.2 also apply toMPI_Win_allocate; in particular, see the rationale in MPI-3.1[char167] 8.2 for an explanation of the type used for baseptr.",
      "input_parameters": [
        "size: Size of window in bytes (nonnegative integer).",
        "disp_unit: Local unit size for displacements, in bytes (positive integer).",
        "info: Info argument (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "baseptr: Initial address of window.",
        "win: Window object returned by the call (handle)."
      ]
    },
    {
      "name": "MPI_Win_allocate_shared",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_allocate_shared (MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, void *baseptr, MPI_Win *win)",
      "description": "MPI_Win_allocate_shared is a collective call executed by allprocesses in the group of comm. On each process, it allocatesmemory of at least size bytes that is shared among all processesin comm, and returns a pointer to the locally allocated segmentin baseptr that can be used for load/store accesses on thecalling process.",
      "input_parameters": [
        "size: Size of window in bytes (nonnegative integer).",
        "disp_unit: Local unit size for displacements, in bytes (positive integer).",
        "info: Info argument (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "baseptr: Initial address of window.",
        "win: Window object returned by the call (handle)."
      ]
    },
    {
      "name": "MPI_Win_attach",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Win_attach(MPI_Win win, void *base, MPI_Aint size)",
      "description": "MPI_Win_attach is a one-sided MPI communication call used to attach a memory region of size bytes starting at address base to a window for RMA access. The window win must have been created using MPI_Win_create_dynamic.",
      "input_parameters": [
        "win: A window that was created withI MPI_Win_create_dynamic",
        "base: Initial address of window (choice).",
        "size: Size of window in bytes (nonnegative integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "win: Window object returned by the call (handle)."
      ]
    },
    {
      "name": "MPI_Win_c2f",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Fint MPI_Win_c2f(MPI_Win win)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_call_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_call_errhandler(MPI_Win win, int errorcode)",
      "description": "This function invokes the error handler assigned to the windowwin with the supplied error code errorcode. If the errorhandler was successfully called, the process is not aborted, and theerror handler returns, this function returns MPI_SUCCESS.",
      "input_parameters": [
        "win: Window with error handler (handle).",
        "errorcode: MPI error code (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_complete",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Win_complete(MPI_Win win)",
      "description": "MPI_Win_complete is a one-sided MPI communication synchronization call, completing an RMA access epoch on win started by a call to MPI_Win_start. MPI_Win_complete enforces the completion of preceding RMA calls at the origin and not at the target. A put or accumulate call may not have completed at the target when it has completed at the origin.",
      "input_parameters": [
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_create",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, MPI_Win *win)",
      "description": "MPI_Win_create is a one-sided MPI communication collective call executed by all processes in the group of comm. It returns a window object that can be used by these processes to perform RMA operations. Each process specifies a window of existing memory that it exposes to RMA accesses by the processes in the group of comm. The window consists of size bytes, starting at address base. A process may elect to expose no memory by specifying size = 0.",
      "input_parameters": [
        "base: Initial address of window (choice).",
        "size: Size of window in bytes (nonnegative integer).",
        "disp_unit: Local unit size for displacements, in bytes (positive integer).",
        "info: Info argument (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "win: Window object returned by the call (handle)."
      ]
    },
    {
      "name": "MPI_Win_create_dynamic",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win)",
      "description": "MPI_Win_create_dynamic is a one-sided MPI communication collective call executed by all processes in the group of comm. It returns a window object without memory attached that can be used by these processes to perform RMA operations.",
      "input_parameters": [
        "info: Info argument (handle).",
        "comm: Communicator (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "win: Window object returned by the call (handle)."
      ]
    },
    {
      "name": "MPI_Win_create_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_create_errhandler(MPI_Win_errhandler_function *function, MPI_Errhandler *errhandler)",
      "description": "MPI_Win_create_errhandler should be, in C, a function of type MPI_Win_errhandler_function, which is defined as",
      "input_parameters": [
        "function: User-defined error-handling procedure (function)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: MPI error handler (handle)."
      ]
    },
    {
      "name": "MPI_Win_create_keyval",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn, MPI_Win_delete_attr_function *win_delete_attr_fn, int *win_keyval, void *extra_state)",
      "description": "The argument win_copy_attr_fn may be specified as MPI_WIN_NULL_COPY_FN or MPI_WIN_DUP_FN from either C, C++, or Fortran. MPI_WIN_NULL_COPY_FN is a function that serves only to return flag = 0 and MPI_SUCCESS. MPI_WIN_DUP_FN is a simple-minded copy function that sets flag = 1, returns the value of attribute_val_in in attribute_val_out, and returns MPI_SUCCESS.",
      "input_parameters": [
        "win_copy_attr_fn: Copy callback function for win_keyval (function).",
        "win_delete_attr_fn: Delete callback function for win_keyval (function).",
        "extra_state: Extra state for callback functions."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "win_keyval: Key value for future access (integer)."
      ]
    },
    {
      "name": "MPI_Win_delete_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_delete_attr(MPI_Win win, int win_keyval)",
      "description": "",
      "input_parameters": [
        "win_keyval: Key value (integer)."
      ],
      "input_output_parameters": [
        "win: Window from which the attribute is deleted (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_detach",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Win_detach(MPI_Win win, void *base)",
      "description": "MPI_Win_detach can be used to detach a previously attached memory region from win. The memory address base and win must match arguments passed to a previous call to MPI_Win_attach.",
      "input_parameters": [
        "win: A window that was created withI MPI_Win_create_dynamic",
        "base: Initial address of window (choice).",
        "size: Size of window in bytes (nonnegative integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "win: Window object returned by the call (handle)."
      ]
    },
    {
      "name": "MPI_Win_f2c",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Win MPI_Win_f2c(MPI_Fint win)",
      "description": "Handles are passed between Fortran and C or C++ by using an explicit C wrapper to convert Fortran handles to C handles. There is no direct access to C or C++ handles in Fortran. Handles are passed between C and C++ using overloaded C++ operators called from C++ code. There is no direct access to C++ objects from C. The type definition MPI_Fint is provided in C/C++ for an integer of the size that matches a Fortran INTEGER; usually, MPI_Fint will be equivalent to int. The handle translation functions are provided in C to convert from a Fortran handle (which is an integer) to a C handle, and vice versa.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_fence",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_fence(int assert, MPI_Win win)",
      "description": "MPI_Win_fence synchronizes RMA calls on win. The call is collective on the group of win. All RMA operations on win originating at a given process and started before the fence call will complete at that process before the fence call returns. They will be completed at their target before the fence call returns at the target. RMA operations on win started by a process after the fence call returns will access their target window only after MPI_Win_fence has been called by the target process.",
      "input_parameters": [
        "assert: Program assertion (integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_flush",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_flush (int rank, MPI_Win win)",
      "description": "MPI_Win_flush completes all outstanding RMA operations initiated by the calling process to the target rank on the specified window. The operations are completed both at the origin and at the target. MPI_Win_flush_all completes all outstanding RMA operations to all targets.",
      "input_parameters": [
        "rank: Rank of window (nonnegative integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_flush_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_flush_all (MPI_Win win)",
      "description": "MPI_Win_flush completes all outstanding RMA operations initiated by the calling process to the target rank on the specified window. The operations are completed both at the origin and at the target. MPI_Win_flush_all completes all outstanding RMA operations to all targets.",
      "input_parameters": [
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_flush_local",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_flush_local (int rank, MPI_Win win)",
      "description": "MPI_Win_flush_local locally completes at the origin all outstanding RMA operations initiated by the calling process to the target process specified by rank on the specified window. For example, after this routine completes, the user may reuse any buffers provided to put, get, or accumulate operations. MPI_Win_flush_local_all locally completes at the origin all outstanding RMA operations to all targets.",
      "input_parameters": [
        "rank: Rank of window (nonnegative integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_flush_local_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_flush_local_all (MPI_Win win)",
      "description": "MPI_Win_flush_local locally completes at the origin all outstanding RMA operations initiated by the calling process to the target process specified by rank on the specified window. For example, after this routine completes, the user may reuse any buffers provided to put, get, or accumulate operations. MPI_Win_flush_local_all locally completes at the origin all outstanding RMA operations to all targets.",
      "input_parameters": [
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_free",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_free(MPI_Win *win)",
      "description": "MPI_Win_free frees the window object win and returns a null handle (equal to MPI_WIN_NULL). This collective call is executed by all processes in the group associated with win. It can be invoked by a process only after it has completed its involvement in RMA communications on window win, that is, the process has called MPI_Win_fence, or called MPI_Win_unlock to match a previous call to MPI_Win_lock. When the call returns, the window memory can be freed.",
      "input_parameters": [],
      "input_output_parameters": [
        "win: Window object (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_free_keyval",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_free_keyval(int *win_keyval)",
      "description": "",
      "input_parameters": [],
      "input_output_parameters": [
        "win_keyval: Key value (integer)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_get_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_get_attr(MPI_Win win, int win_keyval, void *attribute_val, int *flag)",
      "description": "Obtains the value of a window attribute.",
      "input_parameters": [
        "win: Window to which the attribute is attached (handle).",
        "win_keyval: Key value (integer)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "attribute_val: Attribute value, unless ag = false",
        "flag: False if no attribute is associated with the key (logical)."
      ]
    },
    {
      "name": "MPI_Win_get_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler)",
      "description": "MPI_Win_get_errhandler retrieves the error handler currently associated with a window.",
      "input_parameters": [
        "win: Window (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "errhandler: Error handler currently associated with window (handle)."
      ]
    },
    {
      "name": "MPI_Win_get_group",
      "headerfile_desc": "mpi.h",
      "func_name": "MPI_Win_get_group(MPI_Win win, MPI_Group *group)",
      "description": "MPI_Win_get_group returns a duplicate of the group of the communicator used to create the window associated with win. The group is returned in group.",
      "input_parameters": [
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "group: Group of processes that share access to the window (handle)."
      ]
    },
    {
      "name": "MPI_Win_get_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used)",
      "description": "MPI_Win_get_info returns a new info object containing the hints ofthe window associated with win.",
      "input_parameters": [
        "win: Window from which to receive active info hints"
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "info_used: New info object returned with all active hints on this window."
      ]
    },
    {
      "name": "MPI_Win_get_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen)",
      "description": "",
      "input_parameters": [
        "win: Window whose name is to be returned (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "win_name: the name previously stored on the window, or an empty string if no such name exists (string).",
        "resultlen: Length of returned name (integer)."
      ]
    },
    {
      "name": "MPI_Win_lock",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win)",
      "description": "Starts an RMA access epoch. Locks ensure that only the windows created by specific processes can be accessed by those processes (and by no other processes) during that epoch.",
      "input_parameters": [
        "lock_type: Either MPI_LOCK_EXCLUSIVE or MPI_LOCK_SHARED (state).",
        "rank: Rank of locked window (nonnegative integer).",
        "assert: Program assertion (integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_lock_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_lock_all(int assert, MPI_Win win)",
      "description": "Starts an RMA access epoch to all processes in win, with a lock type of MPI_LOCK_SHARED. During the epoch, the calling process can access the window memory on all processes in win by using RMA operations. A window locked with MPI_Win_lock_all must be unlocked with MPI_Win_unlock_all. This routine is not collective — the ALL refers to a lock on all members of the group of the window.",
      "input_parameters": [
        "assert: Program assertion (integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_post",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_post(MPI_Group group, int assert, MPI_Win win)",
      "description": "Starts an RMA exposure epoch for the local window associated with win. Only the processes belonging to group should access the window with RMA calls on win during this epoch. Each process in group must issue a matching call to MPI_Win_start. MPI_Win_post does not block.",
      "input_parameters": [
        "group: The group of origin processes (handle)",
        "assert: Program assertion (integer)",
        "win: Window object (handle)"
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_set_attr",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val)",
      "description": "",
      "input_parameters": [
        "win_keyval: Key value (integer).",
        "attribute_val: Attribute value."
      ],
      "input_output_parameters": [
        "win: Window to which attribute will be attached (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_set_errhandler",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler)",
      "description": "MPI_Win_set_errhandler attaches a new error handler to a window. The error handler must be either a predefined error handler or an error handler created by a call to MPI_Win_create_errhandler.",
      "input_parameters": [
        "errhandler: New error handler for window (handle)."
      ],
      "input_output_parameters": [
        "win: Window (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_set_info",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_set_info(MPI_Win win, MPI_Info info)",
      "description": "MPI_WIN_SET_INFO sets new values for the hints of the windowassociated with win.",
      "input_parameters": [
        "win: Window on which to set info hints",
        "info: Info object containing hints to be set onI win"
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_set_name",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_set_name(MPI_Win win, const char *win_name)",
      "description": "",
      "input_parameters": [
        "win_name: The character string used as the name (string)."
      ],
      "input_output_parameters": [
        "win: Window whose identifier is to be set (handle)."
      ],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_shared_query",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_shared_query (MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr)",
      "description": "MPI_Win_shared_query queries the process-local address forremote memory segments created with MPI_Win_allocate_shared. Thisfunction can return different process-local addresses for the samephysical memory on different processes.",
      "input_parameters": [
        "win: Shared memory window object (handle).",
        "rank: Rank in the group of window win (non-negative integer) or MPI_PROC_NULL."
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "size: Size of the window segment (non-negative integer).",
        "disp_unit: Local unit size for displacements, in bytes (positive integer).",
        "baseptr: Address for load/store access to window segment (choice)."
      ]
    },
    {
      "name": "MPI_Win_start",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_start(MPI_Group group, int assert, MPI_Win win)",
      "description": "MPI_Win_start is a one-sided MPI communication synchronization call that starts an RMA access epoch for win. RMA calls issued on win during this epoch mustaccess only windows at processes in group. Each process in group must issue a matchingcall to MPI_Win_post. MPI_Win_startis allowed to block until the corresponding MPI_Win_post calls have been executed, but is not required to.",
      "input_parameters": [
        "group: The group of target processes (handle).",
        "assert: Program assertion (integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_sync",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_sync (MPI_Win win)",
      "description": "MPI_Win_sync synchronizes the private and public window copies of win. For the purposes of synchronizing the private and public window, MPI_Win_sync has the effect of ending and reopening an access and exposure epoch on the window (note that it does not actually end an epoch or complete any pending MPI RMA operations).",
      "input_parameters": [
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_test",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_test(MPI_Win win, int *flag)",
      "description": "MPI_Win_test is a one-sided MPI communication synchronization call, anonblocking version of MPI_Win_wait. It returns flag = true ifMPI_Win_wait would return, flag = false otherwise. The effect of return of MPI_Win_test with flag = true is the same as the effect of a return of MPI_Win_wait. If flag = false is returned, then the call has no visible effect.",
      "input_parameters": [
        "win: Window object (handle)"
      ],
      "input_output_parameters": [],
      "output_parameters": [
        "flag: The returning state of the test for epoch closure."
      ]
    },
    {
      "name": "MPI_Win_unlock",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_unlock(int rank, MPI_Win win)",
      "description": "MPI_Win_unlock completes an RMA access epoch started by a call to MPI_Win_lock. RMA operations issued during this period will have completed both at the origin and at the target when the call returns.",
      "input_parameters": [
        "rank: Rank of window (nonnegative integer).",
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_unlock_all",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_unlock_all(MPI_Win win)",
      "description": "MPI_Win_unlock_all completes an RMA access epoch started by a call to MPI_Win_lock_all. RMA operations issued during this period will have completed both at the origin and at the target when the call returns.",
      "input_parameters": [
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Win_wait",
      "headerfile_desc": "mpi.h",
      "func_name": "int MPI_Win_wait(MPI_Win win)",
      "description": "MPI_Win_wait is a one-sided MPI communication synchronization call that completes an RMA exposure epoch started by a call to MPI_Win_post on win. Thiscall matches calls to MPI_Win_complete(win) issued by each of the processes thatwere granted access to the window during this epoch. The call to MPI_Win_wait blocksuntil all matching calls to MPI_Win_complete have occurred. This guarantees that allthese origin processes have completed their RMA accesses to the local window. When thecall returns, all these RMA accesses will have completed at the target window.",
      "input_parameters": [
        "win: Window object (handle)."
      ],
      "input_output_parameters": [],
      "output_parameters": []
    },
    {
      "name": "MPI_Wtick",
      "headerfile_desc": "mpi.h",
      "func_name": "double MPI_Wtick()",
      "description": "MPI_Wtick returns the resolution of MPI_Wtime in seconds. That is, itreturns, as a double-precision value, the number of seconds betweensuccessive clock ticks. For example, if the clock is implemented bythe hardware as a counter that is incremented every millisecond, thevalue returned by MPI_Wtick should be 10^-3.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [],
      "return": [
        "Time in seconds of resolution of MPI_Wtime."
      ]
    },
    {
      "name": "MPI_Wtime",
      "headerfile_desc": "mpi.h",
      "func_name": "double MPI_Wtime()",
      "description": "MPI_Wtime returns a floating-point number of seconds, representing elapsed wall-clock time since some time in the past.",
      "input_parameters": [],
      "input_output_parameters": [],
      "output_parameters": [],
      "return": [
        "Time in seconds since an arbitrary time in the past."
      ]
    }
  ],
  "DPAK_FUNCTION": [
    {
      "name": "hwoff_global_del_vxlan_vtep",
      "func_name": "int hwoff_global_del_vxlan_vtep(uint32_t vxlan_dstip)",
      "description": "Delete the VXLAN IP address from the driver.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "vxlan_dstip: IP address of the VXLAN"
      ],
      "return": [
        "0: Success",
        "other values: Failure"
      ]
    },
    {
      "name": "hwoff_global_add_vxlan_vtep",
      "func_name": "int hwoff_global_add_vxlan_vtep(uint32_t vxlan_dstip)",
      "description": "Set the VXLAN IP address to the driver.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "vxlan_dstip: IP address of the VXLAN"
      ],
      "return": [
        "0: Success",
        "other values: Failure"
      ]
    },
    {
      "name": "hwoff_set_qos",
      "func_name": "void hwoff_set_qos(uint16_t port_id, const char *type, const struct smap *details)",
      "description": "Set the egress bandwidth limit of a port.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "port_id: Port ID",
        "type: Rate limit type ",
        "details: Rate limit value"
      ],
      "return": ""
    },
    {
      "name": "hwoff_set_ingress_policing",
      "func_name": "void hwoff_set_ingress_policing(uint16_t port_id, uint32_t policer_rate, uint32_t policer_burst)",
      "description": "Set the ingress bandwidth limit of a port.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "port_id: Port ID",
        "policer_rate: Average bandwidth",
        "policer_burst: Instantaneous bandwidth"
      ],
      "return": ""
    },
    {
      "name": "hwoff_set_module_log_level",
      "func_name": "int hwoff_set_module_log_level(const char *module, uint32_t level)",
      "description": "Set the log level of each module.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "module: Module name",
        "level: Log level. The default value 0 indicates logging is disabled"
      ],
      "return": [
        "0: Success",
        "other values: Failure"
      ]
    },
    {
      "name": "hwoff_rte_flow_deleted_get",
      "func_name": "bool hwoff_rte_flow_deleted_get(struct rte_flow *flow)",
      "description": "Obtain the deletion status of rte_flow.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "flow: Flow pointer"
      ],
      "return": [
        "true or false"
      ]
    },
    {
      "name": "hwoff_rte_flow_deleted_set",
      "func_name": "void hwoff_rte_flow_deleted_set(struct rte_flow *flow, bool flag)",
      "description": "Set the deletion status of rte_flow.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "flow: Flow pointer",
        "flag: true or false"
      ],
      "return": ""
    },
    {
      "name": "hwoff_rte_flow_dealloc",
      "func_name": "void hwoff_rte_flow_dealloc(struct rte_flow *flow)",
      "description": "Release the memory of the rte_flow type.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "flow: Flow pointer"
      ],
      "return": ""
    },
    {
      "name": "hwoff_rte_flow_alloc",
      "func_name": "struct rte_flow* hwoff_rte_flow_alloc(const ovs_u128 *ufid, void* flow_data)",
      "description": "Apply for and initialize the memory of the rte_flow type.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "ufid: ufid of the software flow table",
        "flow_data: Pointer of other data associated with the rte_flow"
      ],
      "return": [
        "Non-NULL: Success",
        "NULL: Failure"
      ]
    },
    {
      "name": "hwoff_parse_vf_extra_options",
      "func_name": "int hwoff_parse_vf_extra_options(uint16_t dpdk_port_id, const struct smap *port_config)",
      "description": "Parse the extra parameters transferred to the VF.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "dpdk_port_id: Port ID of rte_ethdev",
        "port_config: Extra parameters transferred to the VF"
      ],
      "return": [
        "0: Success",
        "Other values: Failure"
      ]
    },
    {
      "name": "hwoff_set_offload_state",
      "func_name": "void hwoff_set_offload_state(hwoff_offload_state_t offload)",
      "description": "Set the NIC offload status.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "offload: NIC offload status"
      ],
      "return": ""
    },
    {
      "name": "hwoff_dp_hook_entry",
      "func_name": "void hwoff_dp_hook_entry(struct hwoff_dp_hook_arg *arg)",
      "description": "Entry function of the hook mounted on the datapath packet processing path.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "arg"
      ],
      "return": ""
    },
    {
      "name": "hwoff_parse_ovs_other_config",
      "func_name": "void hwoff_parse_ovs_other_config(const struct smap *ovs_config)",
      "description": "Parse the Open_vswitch.other_config parameters.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "ovs_config: Open_vswitch other_config configuration information"
      ],
      "return": ""
    },
    {
      "name": "hwoff_tnl_get_src_port",
      "func_name": "uint16_t hwoff_tnl_get_src_port(const struct dp_packet *one_pkt)",
      "description": "The source port of the outer VXLAN is generated based on the RSS hash of inner packets.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "one_pkt"
      ],
      "return": [
        "Source port, in network byte order."
      ]
    },
    {
      "name": "hwoff_is_support_offload",
      "func_name": "bool hwoff_is_support_offload(const struct netdev *netdev)",
      "description": "Check whether offload is supported.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "netdev: Device interface"
      ],
      "return": [
        "true: Supported",
        "false: Not supported"
      ]
    },
    {
      "name": "hwoff_is_ethdev",
      "func_name": "bool hwoff_is_ethdev(const struct netdev *netdev)",
      "description": "Verify the hW eth device.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "netdev: NIC device"
      ],
      "return": [
        "true: Success",
        "false: Failure"
      ]
    },
    {
      "name": "hwoff_is_hiovs_netdev",
      "func_name": "bool hwoff_is_hiovs_netdev(const struct netdev *netdev)",
      "description": "hW NIC device verification.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "netdev: NIC device"
      ],
      "return": [
        "true: Success",
        "false: Failure"
      ]
    },
    {
      "name": "hwoff_get_eth_vport_id",
      "func_name": "uint32_t hwoff_get_eth_vport_id(struct netdev *netdev)",
      "description": "Obtain the vport_id of the eth_dev device.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "netdev: NIC device"
      ],
      "return": [
        "0: Invalid port ID",
        "Other values: Valid port ID"
      ]
    },
    {
      "name": "hwoff_rte_flow_destroy",
      "func_name": "int hwoff_rte_flow_destroy(struct netdev *netdev, struct rte_flow *rte_flow, struct rte_flow_error *error)",
      "description": "Delete the rte_flow interface from netdev_flow_api.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "netdev: NIC device",
        "rte_flow : Software flow table",
        "error : Error statistics"
      ],
      "return": [
        "0: Success",
        "Other: Failure"
      ]
    },
    {
      "name": "hwoff_rte_flow_create",
      "func_name": "struct rte_flow *hwoff_rte_flow_create(struct netdev *netdev, const struct rte_flow_attr *attr, const struct rte_flow_item *items, const struct rte_flow_action *actions, struct rte_flow_error *error)",
      "description": "netdev_flow_api creates the rte_flow interface.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "netdev: NIC device",
        "attr: Flow rule attribute",
        "items: Flow table matching mode item",
        "actions: Flow table action",
        "error : Error statistics"
      ],
      "return": [
        "Non-NULL: Success",
        "NULL: Failure"
      ]
    },
    {
      "name": "hwoff_rte_flow_query_count",
      "func_name": "int hwoff_rte_flow_query_count(struct netdev *netdev, struct rte_flow *rte_flow, struct rte_flow_query_count *query, struct rte_flow_error *error)",
      "description": "netdev_flow_api queries the statistics interface.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "netdev: NIC device",
        "rte_flow : Software flow table",
        "query : Flow table statistics",
        "error : Error statistics"
      ],
      "return": [
        "0: Success",
        "Other values: Failure"
      ]
    },
    {
      "name": "hwoff_agent_destruct",
      "func_name": "void hwoff_agent_destruct(void *aux)",
      "description": "Deinitialize the offload module.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "aux: Set this parameter to NULL"
      ],
      "return": ""
    },
    {
      "name": "hwoff_agent_construct",
      "func_name": "int hwoff_agent_construct(const struct smap *ovs_other_config, const char *pf_pci_addr_str, int pmd_nums,  hwoff_rte_pktmbuf_init_cb cb_func)",
      "description": "Initialize the offload module.",
      "headerfile_desc": "dpak_ovs.h",
      "parameter": [
        "pf_pci_addr_str: PCI address of the PF",
        "pmd_nums: Number of pmd threads",
        "cb_func: rte_pktmbuf_init callback"
      ],
      "return": [
        "0: Success",
        "Other values: Failure"
      ]
    }
  ],
  "AVX_fUNCTION": [
    {
        "name": "_mm_hadd_epi16",
        "full_name": "__m128i _mm_hadd_epi16(__m128i a, __m128i b);",
        "description": "Horizontally add adjacent pairs of 16-bit integers in \"a\" and \"b\", and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_hadd_epi32",
        "full_name": "__m128i _mm_hadd_epi32(__m128i a, __m128i b);",
        "description": "Horizontally add adjacent pairs of 32-bit integers in \"a\" and \"b\", and pack the signed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm_hsub_epi16",
        "full_name": "__m128i _mm_hsub_epi16(__m128i a, __m128i b);",
        "description": "Horizontally subtract adjacent pairs of 16-bit integers in \"a\" and \"b\", and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_hsub_epi32",
        "full_name": "__m128i _mm_hsub_epi32(__m128i a, __m128i b);",
        "description": "Horizontally subtract adjacent pairs of 32-bit integers in \"a\" and \"b\", and pack the signed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm_sign_pi8",
        "full_name": "__m64 _mm_sign_pi8(__m64 a, __m64 b);",
        "description": "Negate packed 8-bit integers in \"a\" when the corresponding signed 8-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm_sign_pi16",
        "full_name": "__m64 _mm_sign_pi16(__m64 a, __m64 b);",
        "description": "Negate packed 16-bit integers in \"a\" when the corresponding signed 16-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm_sign_pi32",
        "full_name": "__m64 _mm_sign_pi32(__m64 a, __m64 b);",
        "description": "Negate packed 32-bit integers in \"a\" when the corresponding signed 32-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm_sign_epi16",
        "full_name": "__m128i _mm_sign_epi16(__m128i a, __m128i b);",
        "description": "Negate packed 16-bit integers in \"a\" when the corresponding signed 16-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm_sign_epi32",
        "full_name": "__m128i _mm_sign_epi32(__m128i a, __m128i b);",
        "description": "Negate packed 32-bit integers in \"a\" when the corresponding signed 32-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm_sign_epi8",
        "full_name": "__m128i _mm_sign_epi8(__m128i a, __m128i b);",
        "description": "Negate packed 8-bit integers in \"a\" when the corresponding signed 8-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm_hadd_pi16",
        "full_name": "__m64 _mm_hadd_pi16(__m64 a, __m64 b);",
        "description": "Horizontally add adjacent pairs of 16-bit integers in \"a\" and \"b\", and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_hadd_pi32",
        "full_name": "__m64 _mm_hadd_pi32(__m64 a, __m64 b);",
        "description": "Horizontally add adjacent pairs of 32-bit integers in \"a\" and \"b\", and pack the signed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm_maddubs_epi16",
        "full_name": "__m128i _mm_maddubs_epi16(__m128i a, __m128i b);",
        "description": "Vertically multiply each unsigned 8-bit integer from \"a\" with the corresponding signed 8-bit integer from \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\"."
    },
    {
        "name": "_mm_hadds_pi16",
        "full_name": "__m64 _mm_hadds_pi16(__m64 a, __m64 b);",
        "description": "Horizontally add adjacent pairs of signed 16-bit integers in \"a\" and \"b\" using saturation, and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_hadds_epi16",
        "full_name": "__m128i _mm_hadds_epi16(__m128i a, __m128i b);",
        "description": "Horizontally add adjacent pairs of signed 16-bit integers in \"a\" and \"b\" using saturation, and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm256_hadds_epi16",
        "full_name": "__m256i _mm256_hadds_epi16(__m256i a, __m256i b);",
        "description": "Horizontally add adjacent pairs of signed 16-bit integers in \"a\" and \"b\" using saturation, and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_hsubs_epi16",
        "full_name": "__m128i _mm_hsubs_epi16(__m128i a, __m128i b);",
        "description": "Horizontally subtract adjacent pairs of signed 16-bit integers in \"a\" and \"b\" using saturation, and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm256_hsubs_epi16",
        "full_name": "__m256i _mm256_hsubs_epi16(__m256i a, __m256i b);",
        "description": "Horizontally subtract adjacent pairs of signed 16-bit integers in \"a\" and \"b\" using saturation, and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_hsubs_pi16",
        "full_name": "__m64 _mm_hsubs_pi16(__m64 a, __m64 b);",
        "description": "Horizontally subtract adjacent pairs of signed 16-bit integers in \"a\" and \"b\" using saturation, and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_mulhrs_epi16",
        "full_name": "__m128i _mm_mulhrs_epi16(__m128i a, __m128i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\"."
    },
    {
        "name": "_mm_mulhrs_pi16",
        "full_name": "__m64 _mm_mulhrs_pi16(__m64 a, __m64 b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\"."
    },
    {
        "name": "_mm_popcnt_u32",
        "full_name": "int _mm_popcnt_u32(unsigned int a);",
        "description": "Count the number of bits set to 1 in unsigned 32-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_mm_popcnt_u64",
        "full_name": "__int64 _mm_popcnt_u64(unsigned __int64 a);",
        "description": "Count the number of bits set to 1 in unsigned 64-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_mm_cvtpd_pi32",
        "full_name": "__m64 _mm_cvtpd_pi32(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtpd_epi32",
        "full_name": "__m128i _mm_cvtpd_epi32(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_div_epi8",
        "full_name": "__m128i _mm_div_epi8(__m128i a, __m128i b);",
        "description": "Divide packed signed 8-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_div_epi16",
        "full_name": "__m128i _mm_div_epi16(__m128i a, __m128i b);",
        "description": "Divide packed signed 16-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_div_epu8",
        "full_name": "__m128i _mm_div_epu8(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 8-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_div_epu16",
        "full_name": "__m128i _mm_div_epu16(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 16-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_sll_epi32",
        "full_name": "__m128i _mm_sll_epi32(__m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sll_epi64",
        "full_name": "__m128i _mm_sll_epi64(__m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_epi64",
        "full_name": "__m128i _mm_cmpeq_epi64(__m128i a, __m128i b);",
        "description": "Compare packed 64-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_move_ss",
        "full_name": "__m128 _mm_move_ss(__m128 a, __m128 b);",
        "description": "Move the lower single-precision (32-bit) floating-point element from \"b\" to the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_move_sd",
        "full_name": "__m128d _mm_move_sd(__m128d a, __m128d b);",
        "description": "Move the lower double-precision (64-bit) floating-point element from \"b\" to the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_testz_si128",
        "full_name": "int _mm_testz_si128(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of 128 bits (representing integer data) in \"a\" and \"b\", and set \"ZF\" to 1 if the result is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", and set \"CF\" to 1 if the result is zero, otherwise set \"CF\" to 0. Return the \"ZF\" value."
    },
    {
        "name": "_mm_extract_ps",
        "full_name": "int _mm_extract_ps(__m128 a, const int imm8);",
        "description": "Extract a single-precision (32-bit) floating-point element from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_crc32_u8",
        "full_name": "unsigned int _mm_crc32_u8(unsigned int crc, unsigned char v);",
        "description": "Starting with the initial value in \"crc\", accumulates a CRC32 value for unsigned 8-bit integer \"v\", and stores the result in \"dst\"."
    },
    {
        "name": "_mm_crc32_u16",
        "full_name": "unsigned int _mm_crc32_u16(unsigned int crc, unsigned short v);",
        "description": "Starting with the initial value in \"crc\", accumulates a CRC32 value for unsigned 16-bit integer \"v\", and stores the result in \"dst\"."
    },
    {
        "name": "_mm_crc32_u32",
        "full_name": "unsigned int _mm_crc32_u32(unsigned int crc, unsigned int v);",
        "description": "Starting with the initial value in \"crc\", accumulates a CRC32 value for unsigned 32-bit integer \"v\", and stores the result in \"dst\"."
    },
    {
        "name": "_mm_crc32_u64",
        "full_name": "unsigned __int64 _mm_crc32_u64(unsigned __int64 crc, unsigned __int64 v);",
        "description": "Starting with the initial value in \"crc\", accumulates a CRC32 value for unsigned 64-bit integer \"v\", and stores the result in \"dst\"."
    },
    {
        "name": "_mm_set_pd",
        "full_name": "__m128d _mm_set_pd(double e1, double e0);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_set1_epi64x",
        "full_name": "__m128i _mm_set1_epi64x(int64_t a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\". This intrinsic may generate the \"vpbroadcastq\"."
    },
    {
        "name": "_mm_set1_pd",
        "full_name": "__m128d _mm_set1_pd(double a);",
        "description": "Broadcast double-precision (64-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_set_epi32",
        "full_name": "__m128i _mm_set_epi32(int e3, int e2, int e1, int e0);",
        "description": "Set packed 32-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_set_epi64x",
        "full_name": "__m128i _mm_set_epi64x(int64_t e1, int64_t e0);",
        "description": "Set packed 64-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_cmpestri",
        "full_name": "int _mm_cmpestri(__m128i a, int la, __m128i b, int lb, const int imm8);",
        "description": "Compare packed strings in \"a\" and \"b\" with lengths \"la\" and \"lb\" using the control in \"imm8\", and store the generated index in \"dst\".\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpestrm",
        "full_name": "__m128i _mm_cmpestrm(__m128i a, int la, __m128i b, int lb, const int imm8);",
        "description": "Compare packed strings in \"a\" and \"b\" with lengths \"la\" and \"lb\" using the control in \"imm8\", and store the generated mask in \"dst\".\n\t[strcmp_note]"
    },
    {
        "name": "_mm_insert_epi16",
        "full_name": "__m128i _mm_insert_epi16(__m128i a, int i, const int imm8);",
        "description": "Copy \"a\" to \"dst\", and insert the 16-bit integer \"i\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm_load_epi32",
        "full_name": "__m128i _mm_load_epi32(void const * mem_addr);",
        "description": "Load 128-bits (composed of 4 packed 32-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_load_epi64",
        "full_name": "__m128i _mm_load_epi64(void const * mem_addr);",
        "description": "Load 128-bits (composed of 2 packed 64-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_load_pd",
        "full_name": "__m128d _mm_load_pd(double const * mem_addr);",
        "description": "Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_load_ps",
        "full_name": "__m128 _mm_load_ps(float const * mem_addr);",
        "description": "Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_store_epi32",
        "full_name": "void _mm_store_epi32(void *mem_addr, __m128i a);",
        "description": "Store 128-bits (composed of 4 packed 32-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_store_epi64",
        "full_name": "void _mm_store_epi64(void *mem_addr, __m128i a);",
        "description": "Store 128-bits (composed of 2 packed 64-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_store_pd",
        "full_name": "void _mm_store_pd(double *mem_addr, __m128d a);",
        "description": "Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_store_ps",
        "full_name": "void _mm_store_ps(float *mem_addr, __m128 a);",
        "description": "Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_set1_epi8",
        "full_name": "__m128i _mm_set1_epi8(char w);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\". This intrinsic may generate \"vpbroadcastb\"."
    },
    {
        "name": "_mm_set1_epi16",
        "full_name": "__m128i _mm_set1_epi16(short a);",
        "description": "Broadcast 16-bit integer \"a\" to all all elements of \"dst\". This intrinsic may generate \"vpbroadcastw\"."
    },
    {
        "name": "_mm_set1_epi32",
        "full_name": "__m128i _mm_set1_epi32(int _i);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\". This intrinsic may generate \"vpbroadcastd\"."
    },
    {
        "name": "_mm_set1_epi64",
        "full_name": "__m128i _mm_set1_epi64(__m64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_set1_ps",
        "full_name": "__m128 _mm_set1_ps(float a);",
        "description": "Broadcast single-precision (32-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_cmpeq_epi8",
        "full_name": "__m128i _mm_cmpeq_epi8(__m128i a, __m128i b);",
        "description": "Compare packed 8-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_epi32",
        "full_name": "__m128i _mm_cmpeq_epi32(__m128i a, __m128i b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_and_si128",
        "full_name": "__m128i _mm_and_si128(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of 128 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_or_si128",
        "full_name": "__m128i _mm_or_si128(__m128i a, __m128i b);",
        "description": "Compute the bitwise OR of 128 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_xor_si128",
        "full_name": "__m128i _mm_xor_si128(__m128i a, __m128i b);",
        "description": "Compute the bitwise XOR of 128 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_andnot_si128",
        "full_name": "__m128i _mm_andnot_si128(__m128i a, __m128i b);",
        "description": "Compute the bitwise NOT of 128 bits (representing integer data) in \"a\" and then AND with \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_castf32_u32",
        "full_name": "unsigned __int32 _castf32_u32(float a);",
        "description": "Cast from type float to type unsigned __int32 without conversion.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_castu32_f32",
        "full_name": "float _castu32_f32(unsigned __int32 a);",
        "description": "Cast from type unsigned __int32 to type float without conversion.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_castf64_u64",
        "full_name": "unsigned __int64 _castf64_u64(double a);",
        "description": "Cast from type double to type unsigned __int64 without conversion.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_castu64_f64",
        "full_name": "double _castu64_f64(unsigned __int64 a);",
        "description": "Cast from type unsigned __int64 to type double without conversion.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm_castsi128_ps",
        "full_name": "__m128 _mm_castsi128_ps(__m128i a);",
        "description": "Cast vector of type __m128i to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm_castsi128_pd",
        "full_name": "__m128d _mm_castsi128_pd(__m128i a);",
        "description": "Cast vector of type __m128i to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm_castpd_ps",
        "full_name": "__m128 _mm_castpd_ps(__m128d a);",
        "description": "Cast vector of type __m128d to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm_castpd_si128",
        "full_name": "__m128i _mm_castpd_si128(__m128d a);",
        "description": "Cast vector of type __m128d to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm_castps_pd",
        "full_name": "__m128d _mm_castps_pd(__m128 a);",
        "description": "Cast vector of type __m128 to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm_castps_si128",
        "full_name": "__m128i _mm_castps_si128(__m128 a);",
        "description": "Cast vector of type __m128 to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm_max_epu8",
        "full_name": "__m128i _mm_max_epu8(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_min_epu8",
        "full_name": "__m128i _mm_min_epu8(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_sub_epi8",
        "full_name": "__m128i _mm_sub_epi8(__m128i a, __m128i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_adds_epu8",
        "full_name": "__m128i _mm_adds_epu8(__m128i a, __m128i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_setzero_si128",
        "full_name": "__m128i _mm_setzero_si128();",
        "description": "Return vector of type __m128i with all elements set to zero."
    },
    {
        "name": "_mm_slli_si128",
        "full_name": "__m128i _mm_slli_si128(__m128i a, const int imm8);",
        "description": "Shift \"a\" left by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srli_si128",
        "full_name": "__m128i _mm_srli_si128(__m128i a, const int imm8);",
        "description": "Shift \"a\" right by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_bslli_si128",
        "full_name": "__m128i _mm_bslli_si128(__m128i a, const int imm8);",
        "description": "Shift \"a\" left by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_bsrli_si128",
        "full_name": "__m128i _mm_bsrli_si128(__m128i a, const int imm8);",
        "description": "Shift \"a\" right by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_slli_si256",
        "full_name": "__m256i _mm256_slli_si256(__m256i a, const int imm8);",
        "description": "Shift 128-bit lanes in \"a\" left by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_srli_si256",
        "full_name": "__m256i _mm256_srli_si256(__m256i a, const int imm8);",
        "description": "Shift 128-bit lanes in \"a\" right by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_insert_epi16",
        "full_name": "__m256i _mm256_insert_epi16(__m256i a, __int16 i, const int index);",
        "description": "Copy \"a\" to \"dst\", and insert the 16-bit integer \"i\" into \"dst\" at the location specified by \"index\"."
    },
    {
        "name": "_mm256_insert_epi8",
        "full_name": "__m256i _mm256_insert_epi8(__m256i a, __int8 i, const int index);",
        "description": "Copy \"a\" to \"dst\", and insert the 8-bit integer \"i\" into \"dst\" at the location specified by \"index\"."
    },
    {
        "name": "_mm256_bslli_epi128",
        "full_name": "__m256i _mm256_bslli_epi128(__m256i a, const int imm8);",
        "description": "Shift 128-bit lanes in \"a\" left by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_bsrli_epi128",
        "full_name": "__m256i _mm256_bsrli_epi128(__m256i a, const int imm8);",
        "description": "Shift 128-bit lanes in \"a\" right by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_slli_epi64",
        "full_name": "__m128i _mm_slli_epi64(__m128i a, int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srli_epi64",
        "full_name": "__m128i _mm_srli_epi64(__m128i a, int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtsi32_si128",
        "full_name": "__m128i _mm_cvtsi32_si128(int a);",
        "description": "Copy 32-bit integer \"a\" to the lower elements of \"dst\", and zero the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtsi128_si32",
        "full_name": "int _mm_cvtsi128_si32(__m128i a);",
        "description": "Copy the lower 32-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_mm_packs_epi16",
        "full_name": "__m128i _mm_packs_epi16(__m128i a, __m128i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_packs_epi32",
        "full_name": "__m128i _mm_packs_epi32(__m128i a, __m128i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_movemask_ps",
        "full_name": "int _mm_movemask_ps(__m128 a);",
        "description": "Set each bit of mask \"dst\" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in \"a\"."
    },
    {
        "name": "_mm_movemask_epi8",
        "full_name": "int _mm_movemask_epi8(__m128i a);",
        "description": "Create mask from the most significant bit of each 8-bit element in \"a\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_shuffle_epi8",
        "full_name": "__m128i _mm_shuffle_epi8(__m128i a, __m128i b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_undefined_si128",
        "full_name": "__m128i _mm_undefined_si128(void);",
        "description": "Return vector of type __m128i with undefined elements."
    },
    {
        "name": "_mm_undefined_pd",
        "full_name": "__m128d _mm_undefined_pd(void);",
        "description": "Return vector of type __m128d with undefined elements."
    },
    {
        "name": "_mm_undefined_ps",
        "full_name": "__m128 _mm_undefined_ps(void);",
        "description": "Return vector of type __m128 with undefined elements."
    },
    {
        "name": "_mm_lfence",
        "full_name": "void _mm_lfence(void);",
        "description": "Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order."
    },
    {
        "name": "_mm_sfence",
        "full_name": "void _mm_sfence(void);",
        "description": "Perform a serializing operation on all store-to-memory instructions that were issued prior to this instruction. Guarantees that every store instruction that precedes, in program order, is globally visible before any store instruction which follows the fence in program order."
    },
    {
        "name": "_mm_mfence",
        "full_name": "void _mm_mfence(void);",
        "description": "Perform a serializing operation on all load-from-memory and store-to-memory instructions that were issued prior to this instruction. Guarantees that every memory access that precedes, in program order, the memory fence instruction is globally visible before any memory instruction which follows the fence in program order."
    },
    {
        "name": "_mm_add_epi16",
        "full_name": "__m128i _mm_add_epi16(__m128i a, __m128i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_epi32",
        "full_name": "__m128i _mm_add_epi32(__m128i a, __m128i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_epi64",
        "full_name": "__m128i _mm_add_epi64(__m128i a, __m128i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_epi8",
        "full_name": "__m128i _mm_add_epi8(__m128i a, __m128i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_pd",
        "full_name": "__m128d _mm_add_pd(__m128d a, __m128d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_sd",
        "full_name": "__m128d _mm_add_sd(__m128d a, __m128d b);",
        "description": "Add the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_and_pd",
        "full_name": "__m128d _mm_and_pd(__m128d a, __m128d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_epi16",
        "full_name": "__m128i _mm_cmpeq_epi16(__m128i a, __m128i b);",
        "description": "Compare packed 16-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_pd",
        "full_name": "__m128d _mm_cmpeq_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_sd",
        "full_name": "__m128d _mm_cmpeq_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for equality, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpgt_epi16",
        "full_name": "__m128i _mm_cmpgt_epi16(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_epi32",
        "full_name": "__m128i _mm_cmpgt_epi32(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_epi8",
        "full_name": "__m128i _mm_cmpgt_epi8(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmplt_epi16",
        "full_name": "__m128i _mm_cmplt_epi16(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than, and store the results in \"dst\". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched."
    },
    {
        "name": "_mm_cmplt_epi32",
        "full_name": "__m128i _mm_cmplt_epi32(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than, and store the results in \"dst\". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched."
    },
    {
        "name": "_mm_cmplt_epi8",
        "full_name": "__m128i _mm_cmplt_epi8(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than, and store the results in \"dst\". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched."
    },
    {
        "name": "_mm_div_pd",
        "full_name": "__m128d _mm_div_pd(__m128d a, __m128d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_div_sd",
        "full_name": "__m128d _mm_div_sd(__m128d a, __m128d b);",
        "description": "Divide the lower double-precision (64-bit) floating-point element in \"a\" by the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_max_epi16",
        "full_name": "__m128i _mm_max_epi16(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_min_epi16",
        "full_name": "__m128i _mm_min_epi16(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_move_epi64",
        "full_name": "__m128i _mm_move_epi64(__m128i a);",
        "description": "Copy the lower 64-bit integer in \"a\" to the lower element of \"dst\", and zero the upper element."
    },
    {
        "name": "_mm_mul_pd",
        "full_name": "__m128d _mm_mul_pd(__m128d a, __m128d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mul_sd",
        "full_name": "__m128d _mm_mul_sd(__m128d a, __m128d b);",
        "description": "Multiply the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mullo_epi16",
        "full_name": "__m128i _mm_mullo_epi16(__m128i a, __m128i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_or_pd",
        "full_name": "__m128d _mm_or_pd(__m128d a, __m128d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_set_epi64",
        "full_name": "__m128i _mm_set_epi64(__m64 e1, __m64 e0);",
        "description": "Set packed 64-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_set_epi16",
        "full_name": "__m128i _mm_set_epi16(short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0);",
        "description": "Set packed 16-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_setr_epi16",
        "full_name": "__m128i _mm_setr_epi16(short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0);",
        "description": "Set packed 16-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_setr_epi32",
        "full_name": "__m128i _mm_setr_epi32(int e3, int e2, int e1, int e0);",
        "description": "Set packed 32-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_setr_epi64",
        "full_name": "__m128i _mm_setr_epi64(__m64 e1, __m64 e0);",
        "description": "Set packed 64-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_setr_pd",
        "full_name": "__m128d _mm_setr_pd(double e1, double e0);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_set_pd1",
        "full_name": "__m128d _mm_set_pd1(double a);",
        "description": "Broadcast double-precision (64-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_set_sd",
        "full_name": "__m128d _mm_set_sd(double a);",
        "description": "Copy double-precision (64-bit) floating-point element \"a\" to the lower element of \"dst\", and zero the upper element."
    },
    {
        "name": "_mm_setzero_pd",
        "full_name": "__m128d _mm_setzero_pd(void);",
        "description": "Return vector of type __m128d with all elements set to zero."
    },
    {
        "name": "_mm_sub_epi16",
        "full_name": "__m128i _mm_sub_epi16(__m128i a, __m128i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_epi32",
        "full_name": "__m128i _mm_sub_epi32(__m128i a, __m128i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_epi64",
        "full_name": "__m128i _mm_sub_epi64(__m128i a, __m128i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_pd",
        "full_name": "__m128d _mm_sub_pd(__m128d a, __m128d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_sd",
        "full_name": "__m128d _mm_sub_sd(__m128d a, __m128d b);",
        "description": "Subtract the lower double-precision (64-bit) floating-point element in \"b\" from the lower double-precision (64-bit) floating-point element in \"a\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_xor_pd",
        "full_name": "__m128d _mm_xor_pd(__m128d a, __m128d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtsd_f64",
        "full_name": "double _mm_cvtsd_f64(__m128d a);",
        "description": "Copy the lower double-precision (64-bit) floating-point element of \"a\" to \"dst\"."
    },
    {
        "name": "_mm_cvtsi128_si64",
        "full_name": "__int64 _mm_cvtsi128_si64(__m128i a);",
        "description": "Copy the lower 64-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_mm_cvtsi128_si64x",
        "full_name": "__int64 _mm_cvtsi128_si64x(__m128i a);",
        "description": "Copy the lower 64-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_mm_cvtsi64_si128",
        "full_name": "__m128i _mm_cvtsi64_si128(__int64 a);",
        "description": "Copy 64-bit integer \"a\" to the lower element of \"dst\", and zero the upper element."
    },
    {
        "name": "_mm_cvtsi64x_si128",
        "full_name": "__m128i _mm_cvtsi64x_si128(__int64 a);",
        "description": "Copy 64-bit integer \"a\" to the lower element of \"dst\", and zero the upper element."
    },
    {
        "name": "_mm_cmpge_pd",
        "full_name": "__m128d _mm_cmpge_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for greater-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpge_sd",
        "full_name": "__m128d _mm_cmpge_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for greater-than-or-equal, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_add_si64",
        "full_name": "__m64 _mm_add_si64(__m64 a, __m64 b);",
        "description": "Add 64-bit integers \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_comieq_sd",
        "full_name": "int _mm_comieq_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for equality, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comige_sd",
        "full_name": "int _mm_comige_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for greater-than-or-equal, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comigt_sd",
        "full_name": "int _mm_comigt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for greater-than, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comile_sd",
        "full_name": "int _mm_comile_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for less-than-or-equal, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comilt_sd",
        "full_name": "int _mm_comilt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for less-than, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comineq_sd",
        "full_name": "int _mm_comineq_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for not-equal, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_adds_epi8",
        "full_name": "__m128i _mm_adds_epi8(__m128i a, __m128i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_adds_epi16",
        "full_name": "__m128i _mm_adds_epi16(__m128i a, __m128i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_max_pd",
        "full_name": "__m128d _mm_max_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\". [max_float_note]"
    },
    {
        "name": "_mm_max_sd",
        "full_name": "__m128d _mm_max_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". [max_float_note]"
    },
    {
        "name": "_mm_min_pd",
        "full_name": "__m128d _mm_min_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\". [min_float_note]"
    },
    {
        "name": "_mm_min_sd",
        "full_name": "__m128d _mm_min_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". [min_float_note]"
    },
    {
        "name": "_mm_adds_epu16",
        "full_name": "__m128i _mm_adds_epu16(__m128i a, __m128i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_adds_epu16",
        "full_name": "__m128i _mm_mask_adds_epu16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_madd_epi16",
        "full_name": "__m128i _mm_madd_epi16(__m128i a, __m128i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\"."
    },
    {
        "name": "_mm_movemask_pd",
        "full_name": "int _mm_movemask_pd(__m128d a);",
        "description": "Set each bit of mask \"dst\" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in \"a\"."
    },
    {
        "name": "_mm_movepi64_pi64",
        "full_name": "__m64 _mm_movepi64_pi64(__m128i a);",
        "description": "Copy the lower 64-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_mm_movpi64_epi64",
        "full_name": "__m128i _mm_movpi64_epi64(__m64 a);",
        "description": "Copy the 64-bit integer \"a\" to the lower element of \"dst\", and zero the upper element."
    },
    {
        "name": "_mm_ucomieq_sd",
        "full_name": "int _mm_ucomieq_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomige_sd",
        "full_name": "int _mm_ucomige_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomigt_sd",
        "full_name": "int _mm_ucomigt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomile_sd",
        "full_name": "int _mm_ucomile_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomilt_sd",
        "full_name": "int _mm_ucomilt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomineq_sd",
        "full_name": "int _mm_ucomineq_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_mul_epu32",
        "full_name": "__m128i _mm_mul_epu32(__m128i a, __m128i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\"."
    },
    {
        "name": "_mm_mul_su32",
        "full_name": "__m64 _mm_mul_su32(__m64 a, __m64 b);",
        "description": "Multiply the low unsigned 32-bit integers from \"a\" and \"b\", and store the unsigned 64-bit result in \"dst\"."
    },
    {
        "name": "_mm_mulhi_epi16",
        "full_name": "__m128i _mm_mulhi_epi16(__m128i a, __m128i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_mulhi_epu16",
        "full_name": "__m128i _mm_mulhi_epu16(__m128i a, __m128i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_pd",
        "full_name": "__m128d _mm_cmpgt_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_sd",
        "full_name": "__m128d _mm_cmpgt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for greater-than, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmple_pd",
        "full_name": "__m128d _mm_cmple_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmple_sd",
        "full_name": "__m128d _mm_cmple_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvtsd_ss",
        "full_name": "__m128 _mm_cvtsd_ss(__m128 a, __m128d b);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtpi32_pd",
        "full_name": "__m128d _mm_cvtpi32_pd(__m64 a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtps_epi32",
        "full_name": "__m128i _mm_cvtps_epi32(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvttpd_pi32",
        "full_name": "__m64 _mm_cvttpd_pi32(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvttpd_epi32",
        "full_name": "__m128i _mm_cvttpd_epi32(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvttps_epi32",
        "full_name": "__m128i _mm_cvttps_epi32(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtph_ps",
        "full_name": "__m128 _mm_cvtph_ps(__m128i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtph_ps",
        "full_name": "__m128 _mm_mask_cvtph_ps(__m128 src, __mmask8 k, __m128i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_srl_epi16",
        "full_name": "__m128i _mm_srl_epi16(__m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srl_epi32",
        "full_name": "__m128i _mm_srl_epi32(__m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srl_epi64",
        "full_name": "__m128i _mm_srl_epi64(__m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sll_epi16",
        "full_name": "__m128i _mm_sll_epi16(__m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_slli_epi16",
        "full_name": "__m128i _mm_slli_epi16(__m128i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sra_epi16",
        "full_name": "__m128i _mm_sra_epi16(__m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sra_epi32",
        "full_name": "__m128i _mm_sra_epi32(__m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srai_epi16",
        "full_name": "__m128i _mm_srai_epi16(__m128i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srai_epi32",
        "full_name": "__m128i _mm_srai_epi32(__m128i a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmplt_pd",
        "full_name": "__m128d _mm_cmplt_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmplt_ps",
        "full_name": "__m128 _mm_cmplt_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmplt_sd",
        "full_name": "__m128d _mm_cmplt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpnge_pd",
        "full_name": "__m128d _mm_cmpnge_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-greater-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpnge_sd",
        "full_name": "__m128d _mm_cmpnge_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-greater-than-or-equal, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpngt_pd",
        "full_name": "__m128d _mm_cmpngt_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpngt_sd",
        "full_name": "__m128d _mm_cmpngt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-greater-than, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpnle_pd",
        "full_name": "__m128d _mm_cmpnle_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpnle_sd",
        "full_name": "__m128d _mm_cmpnle_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpnlt_pd",
        "full_name": "__m128d _mm_cmpnlt_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpnlt_sd",
        "full_name": "__m128d _mm_cmpnlt_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpneq_sd",
        "full_name": "__m128d _mm_cmpneq_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-equal, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpneq_pd",
        "full_name": "__m128d _mm_cmpneq_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_epi64",
        "full_name": "__m128i _mm_unpackhi_epi64(__m128i a, __m128i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_epi8",
        "full_name": "__m128i _mm_unpackhi_epi8(__m128i a, __m128i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_pd",
        "full_name": "__m128d _mm_unpackhi_pd(__m128d a, __m128d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_epi16",
        "full_name": "__m128i _mm_unpacklo_epi16(__m128i a, __m128i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_epi32",
        "full_name": "__m128i _mm_unpacklo_epi32(__m128i a, __m128i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_epi64",
        "full_name": "__m128i _mm_unpacklo_epi64(__m128i a, __m128i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_epi8",
        "full_name": "__m128i _mm_unpacklo_epi8(__m128i a, __m128i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_pd",
        "full_name": "__m128d _mm_unpacklo_pd(__m128d a, __m128d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtsi32_sd",
        "full_name": "__m128d _mm_cvtsi32_sd(__m128d a, int b);",
        "description": "Convert the signed 32-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvtsi64_sd",
        "full_name": "__m128d _mm_cvtsi64_sd(__m128d a, __int64 b);",
        "description": "Convert the signed 64-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvtsi64x_sd",
        "full_name": "__m128d _mm_cvtsi64x_sd(__m128d a, __int64 b);",
        "description": "Convert the signed 64-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvtss_sd",
        "full_name": "__m128d _mm_cvtss_sd(__m128d a, __m128 b);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_extract_epi16",
        "full_name": "int _mm_extract_epi16(__m128i a, const int imm8);",
        "description": "Extract a 16-bit integer from \"a\", selected with \"imm8\", and store the result in the lower element of \"dst\"."
    },
    {
        "name": "_mm_stream_si128",
        "full_name": "void _mm_stream_si128(__m128i *mem_addr, __m128i a);",
        "description": "Store 128-bits of integer data from \"a\" into memory using a non-temporal memory hint. \n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_store_si128",
        "full_name": "void _mm_store_si128(__m128i *mem_addr, __m128i a);",
        "description": "Store 128-bits of integer data from \"a\" into memory. \n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_storeu_si128",
        "full_name": "void _mm_storeu_si128(__m128i *p, __m128i a);",
        "description": "Store 128-bits of integer data from \"a\" into memory.\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_load_si128",
        "full_name": "__m128i _mm_load_si128(__m128i const * mem_addr);",
        "description": "Load 128-bits of integer data from memory into \"dst\". \n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_loadu_si128",
        "full_name": "__m128i _mm_loadu_si128(const __m128i *p);",
        "description": "Load 128-bits of integer data from memory into \"dst\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_srli_epi32",
        "full_name": "__m128i _mm_srli_epi32(__m128i a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_slli_epi32",
        "full_name": "__m128i _mm_slli_epi32(__m128i a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_subs_epi16",
        "full_name": "__m128i _mm_subs_epi16(__m128i a, __m128i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_subs_epi8",
        "full_name": "__m128i _mm_subs_epi8(__m128i a, __m128i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_shuffle_epi32",
        "full_name": "__m128i _mm_shuffle_epi32(__m128i a, const int imm8);",
        "description": "Shuffle 32-bit integers in \"a\" using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_store_sd",
        "full_name": "void _mm_store_sd(double *mem_addr, __m128d a);",
        "description": "Store the lower double-precision (64-bit) floating-point element from \"a\" into memory. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_store1_pd",
        "full_name": "void _mm_store1_pd(double *mem_addr, __m128d a);",
        "description": "Store the lower double-precision (64-bit) floating-point element from \"a\" into 2 contiguous elements in memory. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_storeh_pd",
        "full_name": "void _mm_storeh_pd(double *mem_addr, __m128d a);",
        "description": "Store the upper double-precision (64-bit) floating-point element from \"a\" into memory."
    },
    {
        "name": "_mm_storel_epi64",
        "full_name": "void _mm_storel_epi64(__m128i *mem_addr, __m128i a);",
        "description": "Store 64-bit integer from the first element of \"a\" into memory."
    },
    {
        "name": "_mm_storel_pd",
        "full_name": "void _mm_storel_pd(double *mem_addr, __m128d a);",
        "description": "Store the lower double-precision (64-bit) floating-point element from \"a\" into memory."
    },
    {
        "name": "_mm_storer_pd",
        "full_name": "void _mm_storer_pd(double *mem_addr, __m128d a);",
        "description": "Store 2 double-precision (64-bit) floating-point elements from \"a\" into memory in reverse order.\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_storeu_pd",
        "full_name": "void _mm_storeu_pd(double *mem_addr, __m128d a);",
        "description": "Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_stream_pd",
        "full_name": "void _mm_stream_pd(double *mem_addr, __m128d a);",
        "description": "Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\" into memory using a non-temporal memory hint.\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_stream_si32",
        "full_name": "void _mm_stream_si32(int *mem_addr, int a);",
        "description": "Store 32-bit integer \"a\" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address \"mem_addr\" is already in the cache, the cache will be updated."
    },
    {
        "name": "_mm_stream_si64",
        "full_name": "void _mm_stream_si64(__int64 *mem_addr, __int64 a);",
        "description": "Store 64-bit integer \"a\" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address \"mem_addr\" is already in the cache, the cache will be updated."
    },
    {
        "name": "_mm_avg_epu16",
        "full_name": "__m128i _mm_avg_epu16(__m128i a, __m128i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_avg_epu8",
        "full_name": "__m128i _mm_avg_epu8(__m128i a, __m128i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_shufflehi_epi16",
        "full_name": "__m128i _mm_shufflehi_epi16(__m128i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of \"dst\", with the low 64 bits being copied from from \"a\" to \"dst\"."
    },
    {
        "name": "_mm_shufflelo_epi16",
        "full_name": "__m128i _mm_shufflelo_epi16(__m128i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of \"dst\", with the high 64 bits being copied from from \"a\" to \"dst\"."
    },
    {
        "name": "_mm_load_pd1",
        "full_name": "__m128d _mm_load_pd1(double const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into both elements of \"dst\"."
    },
    {
        "name": "_mm_load_sd",
        "full_name": "__m128d _mm_load_sd(double const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into the lower of \"dst\", and zero the upper element. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_load1_pd",
        "full_name": "__m128d _mm_load1_pd(double const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into both elements of \"dst\"."
    },
    {
        "name": "_mm_loadl_epi64",
        "full_name": "__m128i _mm_loadl_epi64(__m128i const * mem_addr);",
        "description": "Load 64-bit integer from memory into the first element of \"dst\"."
    },
    {
        "name": "_mm_loadl_pd",
        "full_name": "__m128d _mm_loadl_pd(__m128d a, double const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into the lower element of \"dst\", and copy the upper element from \"a\" to \"dst\". \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_loadr_pd",
        "full_name": "__m128d _mm_loadr_pd(double const * mem_addr);",
        "description": "Load 2 double-precision (64-bit) floating-point elements from memory into \"dst\" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_loadu_pd",
        "full_name": "__m128d _mm_loadu_pd(double const * mem_addr);",
        "description": "Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskmoveu_si128",
        "full_name": "void _mm_maskmoveu_si128(__m128i a, __m128i mask, char *mem_addr);",
        "description": "Conditionally store 8-bit integer elements from \"a\" into memory using \"mask\" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_store_pd1",
        "full_name": "void _mm_store_pd1(double *mem_addr, __m128d a);",
        "description": "Store the lower double-precision (64-bit) floating-point element from \"a\" into 2 contiguous elements in memory. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_malloc",
        "full_name": "void *_mm_malloc(size_t size, size_t align);",
        "description": "Allocate \"size\" bytes of memory, aligned to the alignment specified in \"align\", and return a pointer to the allocated memory. \"_mm_free\" should be used to free memory that is allocated with \"_mm_malloc\"."
    },
    {
        "name": "_mm_free",
        "full_name": "void _mm_free(void *mem_addr);",
        "description": "Free aligned memory that was allocated with \"_mm_malloc\"."
    },
    {
        "name": "_mm_add_ps",
        "full_name": "__m128 _mm_add_ps(__m128 a, __m128 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_ss",
        "full_name": "__m128 _mm_add_ss(__m128 a, __m128 b);",
        "description": "Add the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_and_ps",
        "full_name": "__m128 _mm_and_ps(__m128 a, __m128 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_ps",
        "full_name": "__m128 _mm_cmpeq_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_ss",
        "full_name": "__m128 _mm_cmpeq_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for equality, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_div_ps",
        "full_name": "__m128 _mm_div_ps(__m128 a, __m128 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_div_ss",
        "full_name": "__m128 _mm_div_ss(__m128 a, __m128 b);",
        "description": "Divide the lower single-precision (32-bit) floating-point element in \"a\" by the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_movehl_ps",
        "full_name": "__m128 _mm_movehl_ps(__m128 a, __m128 b);",
        "description": "Move the upper 2 single-precision (32-bit) floating-point elements from \"b\" to the lower 2 elements of \"dst\", and copy the upper 2 elements from \"a\" to the upper 2 elements of \"dst\"."
    },
    {
        "name": "_mm_movelh_ps",
        "full_name": "__m128 _mm_movelh_ps(__m128 a, __m128 b);",
        "description": "Move the lower 2 single-precision (32-bit) floating-point elements from \"b\" to the upper 2 elements of \"dst\", and copy the lower 2 elements from \"a\" to the lower 2 elements of \"dst\"."
    },
    {
        "name": "_mm_mul_ps",
        "full_name": "__m128 _mm_mul_ps(__m128 a, __m128 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mul_ss",
        "full_name": "__m128 _mm_mul_ss(__m128 a, __m128 b);",
        "description": "Multiply the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_or_ps",
        "full_name": "__m128 _mm_or_ps(__m128 a, __m128 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_setr_ps",
        "full_name": "__m128 _mm_setr_ps(float e3, float e2, float e1, float e0);",
        "description": "Set packed single-precision (32-bit) floating-point elements in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_sub_ps",
        "full_name": "__m128 _mm_sub_ps(__m128 a, __m128 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_ss",
        "full_name": "__m128 _mm_sub_ss(__m128 a, __m128 b);",
        "description": "Subtract the lower single-precision (32-bit) floating-point element in \"b\" from the lower single-precision (32-bit) floating-point element in \"a\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_xor_ps",
        "full_name": "__m128 _mm_xor_ps(__m128 a, __m128 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvt_si2ss",
        "full_name": "__m128 _mm_cvt_si2ss(__m128 a, int b);",
        "description": "Convert the signed 32-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvt_ss2si",
        "full_name": "int _mm_cvt_ss2si(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtss_f32",
        "full_name": "float _mm_cvtss_f32(__m128 a);",
        "description": "Copy the lower single-precision (32-bit) floating-point element of \"a\" to \"dst\"."
    },
    {
        "name": "_mm_cvt_pi2ps",
        "full_name": "__m128 _mm_cvt_pi2ps(__m128 a, __m64 b);",
        "description": "Convert packed signed 32-bit integers in \"b\" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of \"dst\", and copy the upper 2 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvt_ps2pi",
        "full_name": "__m64 _mm_cvt_ps2pi(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_insert_pi16",
        "full_name": "__m64 _mm_insert_pi16(__m64 a, int i, const int imm8);",
        "description": "Copy \"a\" to \"dst\", and insert the 16-bit integer \"i\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm_cmpge_ps",
        "full_name": "__m128 _mm_cmpge_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for greater-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpge_ss",
        "full_name": "__m128 _mm_cmpge_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for greater-than-or-equal, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_comieq_ss",
        "full_name": "int _mm_comieq_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for equality, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_ucomieq_ss",
        "full_name": "int _mm_ucomieq_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomige_ss",
        "full_name": "int _mm_ucomige_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomigt_ss",
        "full_name": "int _mm_ucomigt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomile_ss",
        "full_name": "int _mm_ucomile_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomilt_ss",
        "full_name": "int _mm_ucomilt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_ucomineq_ss",
        "full_name": "int _mm_ucomineq_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs."
    },
    {
        "name": "_mm_comige_ss",
        "full_name": "int _mm_comige_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for greater-than-or-equal, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comigt_ss",
        "full_name": "int _mm_comigt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for greater-than, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comile_ss",
        "full_name": "int _mm_comile_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for less-than-or-equal, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comilt_ss",
        "full_name": "int _mm_comilt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for less-than, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_comineq_ss",
        "full_name": "int _mm_comineq_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" for not-equal, and return the boolean result (0 or 1)."
    },
    {
        "name": "_mm_max_ps",
        "full_name": "__m128 _mm_max_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\". [max_float_note]"
    },
    {
        "name": "_mm_max_ss",
        "full_name": "__m128 _mm_max_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper element of \"dst\". [max_float_note]"
    },
    {
        "name": "_mm_min_ps",
        "full_name": "__m128 _mm_min_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\". [min_float_note]"
    },
    {
        "name": "_mm_min_ss",
        "full_name": "__m128 _mm_min_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper element of \"dst\". [min_float_note]"
    },
    {
        "name": "_mm_max_pi16",
        "full_name": "__m64 _mm_max_pi16(__m64 a, __m64 b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_max_pu8",
        "full_name": "__m64 _mm_max_pu8(__m64 a, __m64 b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_min_pi16",
        "full_name": "__m64 _mm_min_pi16(__m64 a, __m64 b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_min_pu8",
        "full_name": "__m64 _mm_min_pu8(__m64 a, __m64 b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_movemask_pi8",
        "full_name": "int _mm_movemask_pi8(__m64 a);",
        "description": "Create mask from the most significant bit of each 8-bit element in \"a\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_mulhi_pu16",
        "full_name": "__m64 _mm_mulhi_pu16(__m64 a, __m64 b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_ps",
        "full_name": "__m128 _mm_cmpgt_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_ss",
        "full_name": "__m128 _mm_cmpgt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for greater-than, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmple_ps",
        "full_name": "__m128 _mm_cmple_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmple_ss",
        "full_name": "__m128 _mm_cmple_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_m_pinsrw",
        "full_name": "__m64 _m_pinsrw(__m64 a, int i, int imm8);",
        "description": "Copy \"a\" to \"dst\", and insert the 16-bit integer \"i\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_m_pmaxsw",
        "full_name": "__m64 _m_pmaxsw(__m64 a, __m64 b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_m_pmaxub",
        "full_name": "__m64 _m_pmaxub(__m64 a, __m64 b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_m_pminsw",
        "full_name": "__m64 _m_pminsw(__m64 a, __m64 b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_m_pminub",
        "full_name": "__m64 _m_pminub(__m64 a, __m64 b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_m_pmovmskb",
        "full_name": "int _m_pmovmskb(__m64 a);",
        "description": "Create mask from the most significant bit of each 8-bit element in \"a\", and store the result in \"dst\"."
    },
    {
        "name": "_m_pmulhuw",
        "full_name": "__m64 _m_pmulhuw(__m64 a, __m64 b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_m_pmulhw",
        "full_name": "__m64 _m_pmulhw(__m64 a, __m64 b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_m_pmullw",
        "full_name": "__m64 _m_pmullw(__m64 a, __m64 b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_set_ps",
        "full_name": "__m128 _mm_set_ps(float e3, float e2, float e1, float e0);",
        "description": "Set packed single-precision (32-bit) floating-point elements in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_set_ps1",
        "full_name": "__m128 _mm_set_ps1(float a);",
        "description": "Broadcast single-precision (32-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_set_ss",
        "full_name": "__m128 _mm_set_ss(float a);",
        "description": "Copy single-precision (32-bit) floating-point element \"a\" to the lower element of \"dst\", and zero the upper 3 elements."
    },
    {
        "name": "_mm_setzero_ps",
        "full_name": "__m128 _mm_setzero_ps(void);",
        "description": "Return vector of type __m128 with all elements set to zero."
    },
    {
        "name": "_mm_cmpnlt_ss",
        "full_name": "__m128 _mm_cmpnlt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtpi16_ps",
        "full_name": "__m128 _mm_cvtpi16_ps(__m64 a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtpi32_ps",
        "full_name": "__m128 _mm_cvtpi32_ps(__m128 a, __m64 b);",
        "description": "Convert packed 32-bit integers in \"b\" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of \"dst\", and copy the upper 2 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtpi32x2_ps",
        "full_name": "__m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, store the results in the lower 2 elements of \"dst\", then covert the packed signed 32-bit integers in \"b\" to single-precision (32-bit) floating-point element, and store the results in the upper 2 elements of \"dst\"."
    },
    {
        "name": "_mm_cvtpi8_ps",
        "full_name": "__m128 _mm_cvtpi8_ps(__m64 a);",
        "description": "Convert the lower packed 8-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtt_ss2si",
        "full_name": "int _mm_cvtt_ss2si(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cmplt_ss",
        "full_name": "__m128 _mm_cmplt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmpnge_ss",
        "full_name": "__m128 _mm_cmpnge_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-greater-than-or-equal, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmpngt_ps",
        "full_name": "__m128 _mm_cmpngt_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpngt_ss",
        "full_name": "__m128 _mm_cmpngt_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-greater-than, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmpnle_ps",
        "full_name": "__m128 _mm_cmpnle_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpnle_ss",
        "full_name": "__m128 _mm_cmpnle_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmpnlt_ps",
        "full_name": "__m128 _mm_cmpnlt_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpnge_ps",
        "full_name": "__m128 _mm_cmpnge_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-greater-than-or-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpneq_ss",
        "full_name": "__m128 _mm_cmpneq_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-equal, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmpneq_ps",
        "full_name": "__m128 _mm_cmpneq_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-equal, and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_ps",
        "full_name": "__m128 _mm_unpackhi_ps(__m128 a, __m128 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtsi32_ss",
        "full_name": "__m128 _mm_cvtsi32_ss(__m128 a, int b);",
        "description": "Convert the signed 32-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtsi64_ss",
        "full_name": "__m128 _mm_cvtsi64_ss(__m128 a, __int64 b);",
        "description": "Convert the signed 64-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_store_ps1",
        "full_name": "void _mm_store_ps1(float *mem_addr, __m128 a);",
        "description": "Store the lower single-precision (32-bit) floating-point element from \"a\" into 4 contiguous elements in memory. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_store_ss",
        "full_name": "void _mm_store_ss(float *mem_addr, __m128 a);",
        "description": "Store the lower single-precision (32-bit) floating-point element from \"a\" into memory. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_store1_ps",
        "full_name": "void _mm_store1_ps(float *mem_addr, __m128 a);",
        "description": "Store the lower single-precision (32-bit) floating-point element from \"a\" into 4 contiguous elements in memory. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_storeh_pi",
        "full_name": "void _mm_storeh_pi(__m64 *mem_addr, __m128 a);",
        "description": "Store the upper 2 single-precision (32-bit) floating-point elements from \"a\" into memory."
    },
    {
        "name": "_mm_storel_pi",
        "full_name": "void _mm_storel_pi(__m64 *mem_addr, __m128 a);",
        "description": "Store the lower 2 single-precision (32-bit) floating-point elements from \"a\" into memory."
    },
    {
        "name": "_mm_storer_ps",
        "full_name": "void _mm_storer_ps(float *mem_addr, __m128 a);",
        "description": "Store 4 single-precision (32-bit) floating-point elements from \"a\" into memory in reverse order.\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_storeu_ps",
        "full_name": "void _mm_storeu_ps(float *mem_addr, __m128 a);",
        "description": "Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_stream_pi",
        "full_name": "void _mm_stream_pi(__m64 *mem_addr, __m64 a);",
        "description": "Store 64-bits of integer data from \"a\" into memory using a non-temporal memory hint."
    },
    {
        "name": "_mm_stream_ps",
        "full_name": "void _mm_stream_ps(float *mem_addr, __m128 a);",
        "description": "Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\" into memory using a non-temporal memory hint.\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_avg_pu16",
        "full_name": "__m64 _mm_avg_pu16(__m64 a, __m64 b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_avg_pu8",
        "full_name": "__m64 _mm_avg_pu8(__m64 a, __m64 b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_extract_pi16",
        "full_name": "int _mm_extract_pi16(__m64 a, int imm8);",
        "description": "Extract a 16-bit integer from \"a\", selected with \"imm8\", and store the result in the lower element of \"dst\"."
    },
    {
        "name": "_m_pavgb",
        "full_name": "__m64 _m_pavgb(__m64 a, __m64 b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_pavgw",
        "full_name": "__m64 _m_pavgw(__m64 a, __m64 b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_pextrw",
        "full_name": "int _m_pextrw(__m64 a, int imm8);",
        "description": "Extract a 16-bit integer from \"a\", selected with \"imm8\", and store the result in the lower element of \"dst\"."
    },
    {
        "name": "_mm_load_ps1",
        "full_name": "__m128 _mm_load_ps1(float const * mem_addr);",
        "description": "Load a single-precision (32-bit) floating-point element from memory into all elements of \"dst\"."
    },
    {
        "name": "_mm_load_ss",
        "full_name": "__m128 _mm_load_ss(float const * mem_addr);",
        "description": "Load a single-precision (32-bit) floating-point element from memory into the lower of \"dst\", and zero the upper 3 elements. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_load1_ps",
        "full_name": "__m128 _mm_load1_ps(float const * mem_addr);",
        "description": "Load a single-precision (32-bit) floating-point element from memory into all elements of \"dst\"."
    },
    {
        "name": "_mm_loadl_pi",
        "full_name": "__m128 _mm_loadl_pi(__m128 a, __m64 const * mem_addr);",
        "description": "Load 2 single-precision (32-bit) floating-point elements from memory into the lower 2 elements of \"dst\", and copy the upper 2 elements from \"a\" to \"dst\". \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_loadr_ps",
        "full_name": "__m128 _mm_loadr_ps(float const * mem_addr);",
        "description": "Load 4 single-precision (32-bit) floating-point elements from memory into \"dst\" in reverse order. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_loadu_ps",
        "full_name": "__m128 _mm_loadu_ps(float const * mem_addr);",
        "description": "Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskmove_si64",
        "full_name": "void _mm_maskmove_si64(__m64 a, __m64 mask, char *mem_addr);",
        "description": "Conditionally store 8-bit integer elements from \"a\" into memory using \"mask\" (elements are not stored when the highest bit is not set in the corresponding element) and a non-temporal memory hint."
    },
    {
        "name": "_m_maskmovq",
        "full_name": "void _m_maskmovq(__m64 a, __m64 mask, char *mem_addr);",
        "description": "Conditionally store 8-bit integer elements from \"a\" into memory using \"mask\" (elements are not stored when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm_loadh_pi",
        "full_name": "__m128 _mm_loadh_pi(__m128 a, __m64 const * mem_addr);",
        "description": "Load 2 single-precision (32-bit) floating-point elements from memory into the upper 2 elements of \"dst\", and copy the lower 2 elements from \"a\" to \"dst\". \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_MM_GET_EXCEPTION_MASK",
        "full_name": "unsigned int _MM_GET_EXCEPTION_MASK();",
        "description": "Macro: Get the exception mask bits from the MXCSR control and status register. The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT"
    },
    {
        "name": "_MM_GET_EXCEPTION_STATE",
        "full_name": "unsigned int _MM_GET_EXCEPTION_STATE();",
        "description": "Macro: Get the exception state bits from the MXCSR control and status register. The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT"
    },
    {
        "name": "_MM_GET_FLUSH_ZERO_MODE",
        "full_name": "unsigned int _MM_GET_FLUSH_ZERO_MODE();",
        "description": "Macro: Get the flush zero bits from the MXCSR control and status register. The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF"
    },
    {
        "name": "_MM_GET_ROUNDING_MODE",
        "full_name": "unsigned int _MM_GET_ROUNDING_MODE();",
        "description": "Macro: Get the rounding mode bits from the MXCSR control and status register. The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO"
    },
    {
        "name": "_MM_SET_EXCEPTION_MASK",
        "full_name": "void _MM_SET_EXCEPTION_MASK(unsigned int a);",
        "description": "Macro: Set the exception mask bits of the MXCSR control and status register to the value in unsigned 32-bit integer \"a\". The exception mask may contain any of the following flags: _MM_MASK_INVALID, _MM_MASK_DIV_ZERO, _MM_MASK_DENORM, _MM_MASK_OVERFLOW, _MM_MASK_UNDERFLOW, _MM_MASK_INEXACT"
    },
    {
        "name": "_MM_SET_EXCEPTION_STATE",
        "full_name": "void _MM_SET_EXCEPTION_STATE(unsigned int a);",
        "description": "Macro: Set the exception state bits of the MXCSR control and status register to the value in unsigned 32-bit integer \"a\". The exception state may contain any of the following flags: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, _MM_EXCEPT_INEXACT"
    },
    {
        "name": "_MM_SET_FLUSH_ZERO_MODE",
        "full_name": "void _MM_SET_FLUSH_ZERO_MODE(unsigned int a);",
        "description": "Macro: Set the flush zero bits of the MXCSR control and status register to the value in unsigned 32-bit integer \"a\". The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF"
    },
    {
        "name": "_MM_SET_ROUNDING_MODE",
        "full_name": "void _MM_SET_ROUNDING_MODE(unsigned int a);",
        "description": "Macro: Set the rounding mode bits of the MXCSR control and status register to the value in unsigned 32-bit integer \"a\". The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO"
    },
    {
        "name": "_mm_rcp_ps",
        "full_name": "__m128 _mm_rcp_ps(__m128 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 1.5*2^-12."
    },
    {
        "name": "_mm_prefetch",
        "full_name": "void _mm_prefetch(char const * p, int i);",
        "description": "Fetch the line of data from memory that contains address \"p\" to a location in the cache hierarchy specified by the locality hint \"i\", which can be one of:<ul>\n    <li>_MM_HINT_T0   // 3, move data using the T0 hint. The PREFETCHT0 instruction will be generated.</li>\n    <li>_MM_HINT_T1   // 2, move data using the T1 hint. The PREFETCHT1 instruction will be generated.</li>\n    <li>_MM_HINT_T2   // 1, move data using the T2 hint. The PREFETCHT2 instruction will be generated.</li>\n    <li>_MM_HINT_NTA  // 0, move data using the non-temporal access (NTA) hint. The PREFETCHNTA instruction will be generated.</li>\n"
    },
    {
        "name": "_mm_cvtepi16_epi32",
        "full_name": "__m128i _mm_cvtepi16_epi32(__m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepu8_epi16",
        "full_name": "__m128i _mm_cvtepu8_epi16(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_ceil_pd",
        "full_name": "__m128d _mm_ceil_pd(__m128d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm_ceil_ps",
        "full_name": "__m128 _mm_ceil_ps(__m128 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed single-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm_ceil_sd",
        "full_name": "__m128d _mm_ceil_sd(__m128d a, __m128d b);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" up to an integer value, store the result as a double-precision floating-point element in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_ceil_ss",
        "full_name": "__m128 _mm_ceil_ss(__m128 a, __m128 b);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" up to an integer value, store the result as a single-precision floating-point element in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtepu8_epi32",
        "full_name": "__m128i _mm_cvtepu8_epi32(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepu8_epi64",
        "full_name": "__m128i _mm_cvtepu8_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 8 byte sof \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepi16_epi64",
        "full_name": "__m128i _mm_cvtepi16_epi64(__m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_insert_epi32",
        "full_name": "__m128i _mm_insert_epi32(__m128i a, int i, const int imm8);",
        "description": "Copy \"a\" to \"dst\", and insert the 32-bit integer \"i\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm_insert_epi64",
        "full_name": "__m128i _mm_insert_epi64(__m128i a, __int64 i, const int imm8);",
        "description": "Copy \"a\" to \"dst\", and insert the 64-bit integer \"i\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm_insert_ps",
        "full_name": "__m128 _mm_insert_ps(__m128 a, __m128 b, const int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert a single-precision (32-bit) floating-point element from \"b\" into \"tmp\" using the control in \"imm8\". Store \"tmp\" to \"dst\" using the mask in \"imm8\" (elements are zeroed out when the corresponding bit is set)."
    },
    {
        "name": "_mm_blendv_epi8",
        "full_name": "__m128i _mm_blendv_epi8(__m128i a, __m128i b, __m128i mask);",
        "description": "Blend packed 8-bit integers from \"a\" and \"b\" using \"mask\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_blendv_pd",
        "full_name": "__m128d _mm_blendv_pd(__m128d a, __m128d b, __m128d mask);",
        "description": "Blend packed double-precision (64-bit) floating-point elements from \"a\" and \"b\" using \"mask\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_blendv_ps",
        "full_name": "__m128 _mm_blendv_ps(__m128 a, __m128 b, __m128 mask);",
        "description": "Blend packed single-precision (32-bit) floating-point elements from \"a\" and \"b\" using \"mask\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_max_epi32",
        "full_name": "__m128i _mm_max_epi32(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_max_epi8",
        "full_name": "__m128i _mm_max_epi8(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_max_epu16",
        "full_name": "__m128i _mm_max_epu16(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_min_epi32",
        "full_name": "__m128i _mm_min_epi32(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_min_epi8",
        "full_name": "__m128i _mm_min_epi8(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_min_epu16",
        "full_name": "__m128i _mm_min_epu16(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mullo_epi32",
        "full_name": "__m128i _mm_mullo_epi32(__m128i a, __m128i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_floor_pd",
        "full_name": "__m128d _mm_floor_pd(__m128d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm_floor_ps",
        "full_name": "__m128 _mm_floor_ps(__m128 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed single-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm_floor_sd",
        "full_name": "__m128d _mm_floor_sd(__m128d a, __m128d b);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" down to an integer value, store the result as a double-precision floating-point element in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_floor_ss",
        "full_name": "__m128 _mm_floor_ss(__m128 a, __m128 b);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" down to an integer value, store the result as a single-precision floating-point element in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_max_epu32",
        "full_name": "__m128i _mm_max_epu32(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_min_epu32",
        "full_name": "__m128i _mm_min_epu32(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_minpos_epu16",
        "full_name": "__m128i _mm_minpos_epu16(__m128i a);",
        "description": "Horizontally compute the minimum amongst the packed unsigned 16-bit integers in \"a\", store the minimum and index in \"dst\", and zero the remaining bits in \"dst\"."
    },
    {
        "name": "_mm_mul_epi32",
        "full_name": "__m128i _mm_mul_epi32(__m128i a, __m128i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\"."
    },
    {
        "name": "_mm_blend_epi16",
        "full_name": "__m128i _mm_blend_epi16(__m128i a, __m128i b, const int imm8);",
        "description": "Blend packed 16-bit integers from \"a\" and \"b\" using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_blend_pd",
        "full_name": "__m128d _mm_blend_pd(__m128d a, __m128d b, const int imm8);",
        "description": "Blend packed double-precision (64-bit) floating-point elements from \"a\" and \"b\" using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_blend_ps",
        "full_name": "__m128 _mm_blend_ps(__m128 a, __m128 b, const int imm8);",
        "description": "Blend packed single-precision (32-bit) floating-point elements from \"a\" and \"b\" using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepu32_epi64",
        "full_name": "__m128i _mm_cvtepu32_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_round_pd",
        "full_name": "__m128d _mm_round_pd(__m128d a, int rounding);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" using the \"rounding\" parameter, and store the results as packed double-precision floating-point elements in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm256_round_pd",
        "full_name": "__m256d _mm256_round_pd(__m256d a, int rounding);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" using the \"rounding\" parameter, and store the results as packed double-precision floating-point elements in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_round_ps",
        "full_name": "__m128 _mm_round_ps(__m128 a, int rounding);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" using the \"rounding\" parameter, and store the results as packed single-precision floating-point elements in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm256_round_ps",
        "full_name": "__m256 _mm256_round_ps(__m256 a, int rounding);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" using the \"rounding\" parameter, and store the results as packed single-precision floating-point elements in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_round_sd",
        "full_name": "__m128d _mm_round_sd(__m128d a, __m128d b, int rounding);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" using the \"rounding\" parameter, store the result as a double-precision floating-point element in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_round_ss",
        "full_name": "__m128 _mm_round_ss(__m128 a, __m128 b, int rounding);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" using the \"rounding\" parameter, store the result as a single-precision floating-point element in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvtepi8_epi64",
        "full_name": "__m128i _mm_cvtepi8_epi64(__m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepi8_epi32",
        "full_name": "__m128i _mm_cvtepi8_epi32(__m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepu16_epi32",
        "full_name": "__m128i _mm_cvtepu16_epi32(__m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepu16_epi64",
        "full_name": "__m128i _mm_cvtepu16_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_extract_epi8",
        "full_name": "int _mm_extract_epi8(__m128i a, const int imm8);",
        "description": "Extract an 8-bit integer from \"a\", selected with \"imm8\", and store the result in the lower element of \"dst\"."
    },
    {
        "name": "_mm_extract_epi32",
        "full_name": "int _mm_extract_epi32(__m128i a, const int imm8);",
        "description": "Extract a 32-bit integer from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_extract_epi64",
        "full_name": "__int64 _mm_extract_epi64(__m128i a, const int imm8);",
        "description": "Extract a 64-bit integer from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_stream_load_si128",
        "full_name": "__m128i _mm_stream_load_si128(__m128i *mem_addr);",
        "description": "Load 128-bits of integer data from memory into \"dst\" using a non-temporal memory hint.\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_test_all_ones",
        "full_name": "int _mm_test_all_ones(__m128i a);",
        "description": "Compute the bitwise NOT of \"a\" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0."
    },
    {
        "name": "_mm_test_all_zeros",
        "full_name": "int _mm_test_all_zeros(__m128i a, __m128i mask);",
        "description": "Compute the bitwise AND of 128 bits (representing integer data) in \"a\" and \"mask\", and return 1 if the result is zero, otherwise return 0."
    },
    {
        "name": "_mm_test_mix_ones_zeros",
        "full_name": "int _mm_test_mix_ones_zeros(__m128i a, __m128i mask);",
        "description": "Compute the bitwise AND of 128 bits (representing integer data) in \"a\" and \"mask\", and set \"ZF\" to 1 if the result is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"mask\", and set \"CF\" to 1 if the result is zero, otherwise set \"CF\" to 0. Return 1 if both the \"ZF\" and \"CF\" values are zero, otherwise return 0."
    },
    {
        "name": "_mm_mpsadbw_epu8",
        "full_name": "__m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\".\n\tEight SADs are performed using one quadruplet from \"b\" and eight quadruplets from \"a\". One quadruplet is selected from \"b\" starting at on the offset specified in \"imm8\". Eight quadruplets are formed from sequential 8-bit integers selected from \"a\" starting at the offset specified in \"imm8\"."
    },
    {
        "name": "_mm_insert_epi8",
        "full_name": "__m128i _mm_insert_epi8(__m128i a, int i, const int imm8);",
        "description": "Copy \"a\" to \"dst\", and insert the lower 8-bit integer from \"i\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm_cmpgt_epi64",
        "full_name": "__m128i _mm_cmpgt_epi64(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpistrz",
        "full_name": "int _mm_cmpistrz(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed strings with implicit lengths in \"a\" and \"b\" using the control in \"imm8\", and returns 1 if any character in \"b\" was null, and 0 otherwise.\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpistrm",
        "full_name": "__m128i _mm_cmpistrm(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed strings with implicit lengths in \"a\" and \"b\" using the control in \"imm8\", and store the generated mask in \"dst\".\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpistro",
        "full_name": "int _mm_cmpistro(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed strings with implicit lengths in \"a\" and \"b\" using the control in \"imm8\", and returns bit 0 of the resulting bit mask.\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpistri",
        "full_name": "int _mm_cmpistri(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed strings with implicit lengths in \"a\" and \"b\" using the control in \"imm8\", and store the generated index in \"dst\".\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpistrc",
        "full_name": "int _mm_cmpistrc(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed strings with implicit lengths in \"a\" and \"b\" using the control in \"imm8\", and returns 1 if the resulting mask was non-zero, and 0 otherwise.\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpistra",
        "full_name": "int _mm_cmpistra(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed strings with implicit lengths in \"a\" and \"b\" using the control in \"imm8\", and returns 1 if \"b\" did not contain a null character and the resulting mask was zero, and 0 otherwise.\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpestro",
        "full_name": "int _mm_cmpestro(__m128i a, int la, __m128i b, int lb, const int imm8);",
        "description": "Compare packed strings in \"a\" and \"b\" with lengths \"la\" and \"lb\" using the control in \"imm8\", and returns bit 0 of the resulting bit mask.\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpestrc",
        "full_name": "int _mm_cmpestrc(__m128i a, int la, __m128i b, int lb, const int imm8);",
        "description": "Compare packed strings in \"a\" and \"b\" with lengths \"la\" and \"lb\" using the control in \"imm8\", and returns 1 if the resulting mask was non-zero, and 0 otherwise.\n\t[strcmp_note]"
    },
    {
        "name": "_mm_cmpestra",
        "full_name": "int _mm_cmpestra(__m128i a, int la, __m128i b, int lb, const int imm8);",
        "description": "Compare packed strings in \"a\" and \"b\" with lengths \"la\" and \"lb\" using the control in \"imm8\", and returns 1 if \"b\" did not contain a null character and the resulting mask was zero, and 0 otherwise.\n\t[strcmp_note]"
    },
    {
        "name": "_mm_addsub_pd",
        "full_name": "__m128d _mm_addsub_pd(__m128d a, __m128d b);",
        "description": "Alternatively add and subtract packed double-precision (64-bit) floating-point elements in \"a\" to/from packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_addsub_ps",
        "full_name": "__m128 _mm_addsub_ps(__m128 a, __m128 b);",
        "description": "Alternatively add and subtract packed single-precision (32-bit) floating-point elements in \"a\" to/from packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_hadd_pd",
        "full_name": "__m128d _mm_hadd_pd(__m128d a, __m128d b);",
        "description": "Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm_hadd_ps",
        "full_name": "__m128 _mm_hadd_ps(__m128 a, __m128 b);",
        "description": "Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm_hsub_pd",
        "full_name": "__m128d _mm_hsub_pd(__m128d a, __m128d b);",
        "description": "Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm_hsub_ps",
        "full_name": "__m128 _mm_hsub_ps(__m128 a, __m128 b);",
        "description": "Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm_movedup_pd",
        "full_name": "__m128d _mm_movedup_pd(__m128d a);",
        "description": "Duplicate the low double-precision (64-bit) floating-point element from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_movehdup_ps",
        "full_name": "__m128 _mm_movehdup_ps(__m128 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_moveldup_ps",
        "full_name": "__m128 _mm_moveldup_ps(__m128 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_m_to_int",
        "full_name": "int _m_to_int(__m64 a);",
        "description": "Copy the lower 32-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_m_to_int64",
        "full_name": "__int64 _m_to_int64(__m64 a);",
        "description": "Copy 64-bit integer \"a\" to \"dst\"."
    },
    {
        "name": "_mm_sub_pi16",
        "full_name": "__m64 _mm_sub_pi16(__m64 a, __m64 b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_pi32",
        "full_name": "__m64 _mm_sub_pi32(__m64 a, __m64 b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_pi8",
        "full_name": "__m64 _mm_sub_pi8(__m64 a, __m64 b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_sub_si64",
        "full_name": "__m64 _mm_sub_si64(__m64 a, __m64 b);",
        "description": "Subtract 64-bit integer \"b\" from 64-bit integer \"a\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtm64_si64",
        "full_name": "__int64 _mm_cvtm64_si64(__m64 a);",
        "description": "Copy 64-bit integer \"a\" to \"dst\"."
    },
    {
        "name": "_mm_cvtsi32_si64",
        "full_name": "__m64 _mm_cvtsi32_si64(int a);",
        "description": "Copy 32-bit integer \"a\" to the lower elements of \"dst\", and zero the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvtsi64_m64",
        "full_name": "__m64 _mm_cvtsi64_m64(__int64 a);",
        "description": "Copy 64-bit integer \"a\" to \"dst\"."
    },
    {
        "name": "_mm_cvtsi64_si32",
        "full_name": "int _mm_cvtsi64_si32(__m64 a);",
        "description": "Copy the lower 32-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_mm_cmpeq_pi8",
        "full_name": "__m64 _mm_cmpeq_pi8(__m64 a, __m64 b);",
        "description": "Compare packed 8-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_pi8",
        "full_name": "__m64 _mm_add_pi8(__m64 a, __m64 b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_pi16",
        "full_name": "__m64 _mm_add_pi16(__m64 a, __m64 b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_add_pi32",
        "full_name": "__m64 _mm_add_pi32(__m64 a, __m64 b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_adds_pi16",
        "full_name": "__m64 _mm_adds_pi16(__m64 a, __m64 b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_from_int",
        "full_name": "__m64 _m_from_int(int a);",
        "description": "Copy 32-bit integer \"a\" to the lower elements of \"dst\", and zero the upper element of \"dst\"."
    },
    {
        "name": "_m_from_int64",
        "full_name": "__m64 _m_from_int64(__int64 a);",
        "description": "Copy 64-bit integer \"a\" to \"dst\"."
    },
    {
        "name": "_mm_madd_pi16",
        "full_name": "__m64 _mm_madd_pi16(__m64 a, __m64 b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\"."
    },
    {
        "name": "_m_paddb",
        "full_name": "__m64 _m_paddb(__m64 a, __m64 b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_paddd",
        "full_name": "__m64 _m_paddd(__m64 a, __m64 b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_paddw",
        "full_name": "__m64 _m_paddw(__m64 a, __m64 b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_pmaddwd",
        "full_name": "__m64 _m_pmaddwd(__m64 a, __m64 b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\"."
    },
    {
        "name": "_mm_adds_pi8",
        "full_name": "__m64 _mm_adds_pi8(__m64 a, __m64 b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_adds_pu16",
        "full_name": "__m64 _mm_adds_pu16(__m64 a, __m64 b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_adds_pu8",
        "full_name": "__m64 _mm_adds_pu8(__m64 a, __m64 b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_pi16",
        "full_name": "__m64 _mm_cmpeq_pi16(__m64 a, __m64 b);",
        "description": "Compare packed 16-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_pi32",
        "full_name": "__m64 _mm_cmpeq_pi32(__m64 a, __m64 b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mulhi_pi16",
        "full_name": "__m64 _mm_mulhi_pi16(__m64 a, __m64 b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_mullo_pi16",
        "full_name": "__m64 _mm_mullo_pi16(__m64 a, __m64 b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_pi16",
        "full_name": "__m64 _mm_cmpgt_pi16(__m64 a, __m64 b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_pi32",
        "full_name": "__m64 _mm_cmpgt_pi32(__m64 a, __m64 b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpgt_pi8",
        "full_name": "__m64 _mm_cmpgt_pi8(__m64 a, __m64 b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_m_pcmpeqb",
        "full_name": "__m64 _m_pcmpeqb(__m64 a, __m64 b);",
        "description": "Compare packed 8-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_m_pcmpeqw",
        "full_name": "__m64 _m_pcmpeqw(__m64 a, __m64 b);",
        "description": "Compare packed 16-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_m_pcmpeqd",
        "full_name": "__m64 _m_pcmpeqd(__m64 a, __m64 b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_m_pcmpgtb",
        "full_name": "__m64 _m_pcmpgtb(__m64 a, __m64 b);",
        "description": "Compare packed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_m_pcmpgtw",
        "full_name": "__m64 _m_pcmpgtw(__m64 a, __m64 b);",
        "description": "Compare packed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_m_pcmpgtd",
        "full_name": "__m64 _m_pcmpgtd(__m64 a, __m64 b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_pdep_u32",
        "full_name": "unsigned int _pdep_u32(unsigned int a, unsigned int mask);",
        "description": "Deposit contiguous low bits from unsigned 32-bit integer \"a\" to \"dst\" at the corresponding bit locations specified by \"mask\"; all other bits in \"dst\" are set to zero."
    },
    {
        "name": "_pdep_u64",
        "full_name": "unsigned __int64 _pdep_u64(unsigned __int64 a, unsigned __int64 mask);",
        "description": "Deposit contiguous low bits from unsigned 64-bit integer \"a\" to \"dst\" at the corresponding bit locations specified by \"mask\"; all other bits in \"dst\" are set to zero."
    },
    {
        "name": "_pext_u32",
        "full_name": "unsigned int _pext_u32(unsigned int a, unsigned int mask);",
        "description": "Extract bits from unsigned 32-bit integer \"a\" at the corresponding bit locations specified by \"mask\" to contiguous low bits in \"dst\"; the remaining upper bits in \"dst\" are set to zero."
    },
    {
        "name": "_pext_u64",
        "full_name": "unsigned __int64 _pext_u64(unsigned __int64 a, unsigned __int64 mask);",
        "description": "Extract bits from unsigned 64-bit integer \"a\" at the corresponding bit locations specified by \"mask\" to contiguous low bits in \"dst\"; the remaining upper bits in \"dst\" are set to zero."
    },
    {
        "name": "_m_psubb",
        "full_name": "__m64 _m_psubb(__m64 a, __m64 b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_m_psubw",
        "full_name": "__m64 _m_psubw(__m64 a, __m64 b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_m_psubd",
        "full_name": "__m64 _m_psubd(__m64 a, __m64 b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_m_punpckhdq",
        "full_name": "__m64 _m_punpckhdq(__m64 a, __m64 b);",
        "description": "Unpack and interleave 32-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_punpckldq",
        "full_name": "__m64 _m_punpckldq(__m64 a, __m64 b);",
        "description": "Unpack and interleave 32-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_set_pi8",
        "full_name": "__m64 _mm_set_pi8(char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0);",
        "description": "Set packed 8-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_set_pi16",
        "full_name": "__m64 _mm_set_pi16(short e3, short e2, short e1, short e0);",
        "description": "Set packed 16-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_set_pi32",
        "full_name": "__m64 _mm_set_pi32(int e1, int e0);",
        "description": "Set packed 32-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm_set1_pi8",
        "full_name": "__m64 _mm_set1_pi8(char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_set1_pi16",
        "full_name": "__m64 _mm_set1_pi16(short a);",
        "description": "Broadcast 16-bit integer \"a\" to all all elements of \"dst\"."
    },
    {
        "name": "_mm_set1_pi32",
        "full_name": "__m64 _mm_set1_pi32(int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_setr_pi16",
        "full_name": "__m64 _mm_setr_pi16(short e3, short e2, short e1, short e0);",
        "description": "Set packed 16-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_setr_pi32",
        "full_name": "__m64 _mm_setr_pi32(int e1, int e0);",
        "description": "Set packed 32-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_setr_pi8",
        "full_name": "__m64 _mm_setr_pi8(char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0);",
        "description": "Set packed 8-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm_setzero_si64",
        "full_name": "__m64 _mm_setzero_si64(void);",
        "description": "Return vector of type __m64 with all elements set to zero."
    },
    {
        "name": "_mm_xor_si64",
        "full_name": "__m64 _mm_xor_si64(__m64 a, __m64 b);",
        "description": "Compute the bitwise XOR of 64 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_srai_pi16",
        "full_name": "__m64 _mm_srai_pi16(__m64 a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srai_pi32",
        "full_name": "__m64 _mm_srai_pi32(__m64 a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srl_pi16",
        "full_name": "__m64 _mm_srl_pi16(__m64 a, __m64 count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srl_pi32",
        "full_name": "__m64 _mm_srl_pi32(__m64 a, __m64 count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srl_si64",
        "full_name": "__m64 _mm_srl_si64(__m64 a, __m64 count);",
        "description": "Shift 64-bit integer \"a\" right by \"count\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_mm_srli_pi16",
        "full_name": "__m64 _mm_srli_pi16(__m64 a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srli_pi32",
        "full_name": "__m64 _mm_srli_pi32(__m64 a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srli_si64",
        "full_name": "__m64 _mm_srli_si64(__m64 a, int imm8);",
        "description": "Shift 64-bit integer \"a\" right by \"imm8\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_mm_sll_pi16",
        "full_name": "__m64 _mm_sll_pi16(__m64 a, __m64 count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sll_pi32",
        "full_name": "__m64 _mm_sll_pi32(__m64 a, __m64 count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sll_si64",
        "full_name": "__m64 _mm_sll_si64(__m64 a, __m64 count);",
        "description": "Shift 64-bit integer \"a\" left by \"count\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_mm_slli_pi16",
        "full_name": "__m64 _mm_slli_pi16(__m64 a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_slli_pi32",
        "full_name": "__m64 _mm_slli_pi32(__m64 a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_slli_si64",
        "full_name": "__m64 _mm_slli_si64(__m64 a, int imm8);",
        "description": "Shift 64-bit integer \"a\" left by \"imm8\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_mm_sra_pi16",
        "full_name": "__m64 _mm_sra_pi16(__m64 a, __m64 count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sra_pi32",
        "full_name": "__m64 _mm_sra_pi32(__m64 a, __m64 count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_m_pslld",
        "full_name": "__m64 _m_pslld(__m64 a, __m64 count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_m_pslldi",
        "full_name": "__m64 _m_pslldi(__m64 a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_m_psllq",
        "full_name": "__m64 _m_psllq(__m64 a, __m64 count);",
        "description": "Shift 64-bit integer \"a\" left by \"count\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_m_psllqi",
        "full_name": "__m64 _m_psllqi(__m64 a, int imm8);",
        "description": "Shift 64-bit integer \"a\" left by \"imm8\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_m_psllw",
        "full_name": "__m64 _m_psllw(__m64 a, __m64 count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_m_psllwi",
        "full_name": "__m64 _m_psllwi(__m64 a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_m_psrad",
        "full_name": "__m64 _m_psrad(__m64 a, __m64 count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_m_psradi",
        "full_name": "__m64 _m_psradi(__m64 a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_m_psraw",
        "full_name": "__m64 _m_psraw(__m64 a, __m64 count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_m_psrawi",
        "full_name": "__m64 _m_psrawi(__m64 a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_m_psrld",
        "full_name": "__m64 _m_psrld(__m64 a, __m64 count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_m_psrldi",
        "full_name": "__m64 _m_psrldi(__m64 a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_m_psrlq",
        "full_name": "__m64 _m_psrlq(__m64 a, __m64 count);",
        "description": "Shift 64-bit integer \"a\" right by \"count\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_m_psrlqi",
        "full_name": "__m64 _m_psrlqi(__m64 a, int imm8);",
        "description": "Shift 64-bit integer \"a\" right by \"imm8\" while shifting in zeros, and store the result in \"dst\"."
    },
    {
        "name": "_m_psrlw",
        "full_name": "__m64 _m_psrlw(__m64 a, __m64 count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_m_psrlwi",
        "full_name": "__m64 _m_psrlwi(__m64 a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_pi8",
        "full_name": "__m64 _mm_unpacklo_pi8(__m64 a, __m64 b);",
        "description": "Unpack and interleave 8-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_pi16",
        "full_name": "__m64 _mm_unpacklo_pi16(__m64 a, __m64 b);",
        "description": "Unpack and interleave 16-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_pi8",
        "full_name": "__m64 _mm_unpackhi_pi8(__m64 a, __m64 b);",
        "description": "Unpack and interleave 8-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_pi16",
        "full_name": "__m64 _mm_unpackhi_pi16(__m64 a, __m64 b);",
        "description": "Unpack and interleave 16-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_psubsb",
        "full_name": "__m64 _m_psubsb(__m64 a, __m64 b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_psubsw",
        "full_name": "__m64 _m_psubsw(__m64 a, __m64 b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_load_si256",
        "full_name": "__m256i _mm256_load_si256(__m256i const * mem_addr);",
        "description": "Load 256-bits of integer data from memory into \"dst\".\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_loadu_si256",
        "full_name": "__m256i _mm256_loadu_si256(__m256i const * mem_addr);",
        "description": "Load 256-bits of integer data from memory into \"dst\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_load_si512",
        "full_name": "__m512i _mm512_load_si512(void const * mem_addr);",
        "description": "Load 512-bits of integer data from memory into \"dst\". \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_loadu_si512",
        "full_name": "__m512i _mm512_loadu_si512(void const * mem_addr);",
        "description": "Load 512-bits of integer data from memory into \"dst\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_setzero_si256",
        "full_name": "__m256i _mm256_setzero_si256(void);",
        "description": "Return vector of type __m256i with all elements set to zero."
    },
    {
        "name": "_mm_sllv_epi32",
        "full_name": "__m128i _mm_sllv_epi32(__m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srlv_epi32",
        "full_name": "__m128i _mm_srlv_epi32(__m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_slli_epi32",
        "full_name": "__m256i _mm256_slli_epi32(__m256i a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_srli_epi32",
        "full_name": "__m256i _mm256_srli_epi32(__m256i a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_slli_epi64",
        "full_name": "__m256i _mm256_slli_epi64(__m256i a, int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_srlv_epi32",
        "full_name": "__m256i _mm256_srlv_epi32(__m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sllv_epi32",
        "full_name": "__m256i _mm256_sllv_epi32(__m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_slli_epi32",
        "full_name": "__m512i _mm512_slli_epi32(__m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_srli_epi32",
        "full_name": "__m512i _mm512_srli_epi32(__m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_srlv_epi32",
        "full_name": "__m512i _mm512_srlv_epi32(__m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sllv_epi32",
        "full_name": "__m512i _mm512_sllv_epi32(__m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_rolv_epi32",
        "full_name": "__m128i _mm_rolv_epi32(__m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_rorv_epi32",
        "full_name": "__m128i _mm_rorv_epi32(__m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_rolv_epi32",
        "full_name": "__m256i _mm256_rolv_epi32(__m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_rorv_epi32",
        "full_name": "__m256i _mm256_rorv_epi32(__m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_rolv_epi32",
        "full_name": "__m512i _mm512_rolv_epi32(__m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_rorv_epi32",
        "full_name": "__m512i _mm512_rorv_epi32(__m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_epi32",
        "full_name": "__m128i _mm_unpackhi_epi32(__m128i a, __m128i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_unpackhi_epi32",
        "full_name": "__m256i _mm256_unpackhi_epi32(__m256i a, __m256i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_unpackhi_epi32",
        "full_name": "__m512i _mm512_unpackhi_epi32(__m512i a, __m512i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_alignr_epi32",
        "full_name": "__m128i _mm_alignr_epi32(__m128i a, __m128i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 32-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 16 bytes (4 elements) in \"dst\"."
    },
    {
        "name": "_mm_alignr_epi64",
        "full_name": "__m128i _mm_alignr_epi64(__m128i a, __m128i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 32-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 16 bytes (2 elements) in \"dst\"."
    },
    {
        "name": "_mm_ternarylogic_epi32",
        "full_name": "__m128i _mm_ternarylogic_epi32(__m128i a, __m128i b, __m128i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\"."
    },
    {
        "name": "_mm256_ternarylogic_epi32",
        "full_name": "__m256i _mm256_ternarylogic_epi32(__m256i a, __m256i b, __m256i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\"."
    },
    {
        "name": "_mm512_ternarylogic_epi32",
        "full_name": "__m512i _mm512_ternarylogic_epi32(__m512i a, __m512i b, __m512i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\"."
    },
    {
        "name": "_mm_i32gather_epi32",
        "full_name": "__m128i _mm_i32gather_epi32(int const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32gather_epi32",
        "full_name": "__m256i _mm256_i32gather_epi32(int const * base_addr, __m256i vindex, const int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32gather_epi32",
        "full_name": "__m512i _mm512_i32gather_epi32(__m512i vindex, void const * base_addr, int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i32gather_epi32",
        "full_name": "__m128i _mm_mmask_i32gather_epi32(__m128i src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i32gather_epi32",
        "full_name": "__m256i _mm256_mmask_i32gather_epi32(__m256i src, __mmask8 k, __m256i vindex, void const * base_addr, const int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32gather_epi32",
        "full_name": "__m512i _mm512_mask_i32gather_epi32(__m512i src, __mmask16 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i32gather_epi32",
        "full_name": "__m128i _mm_mask_i32gather_epi32(__m128i src, int const * base_addr, __m128i vindex, __m128i mask, const int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i32gather_epi32",
        "full_name": "__m256i _mm256_mask_i32gather_epi32(__m256i src, int const * base_addr, __m256i vindex, __m256i mask, const int scale);",
        "description": "Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i32scatter_epi32",
        "full_name": "void _mm_mask_i32scatter_epi32(void *base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i32scatter_epi32",
        "full_name": "void _mm256_mask_i32scatter_epi32(void *base_addr, __mmask8 k, __m256i vindex, __m256i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32scatter_epi32",
        "full_name": "void _mm512_mask_i32scatter_epi32(void *base_addr, __mmask16 k, __m512i vindex, __m512i a, int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_store_si256",
        "full_name": "void _mm256_store_si256(__m256i *mem_addr, __m256i a);",
        "description": "Store 256-bits of integer data from \"a\" into memory.\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_storeu_si256",
        "full_name": "void _mm256_storeu_si256(__m256i *mem_addr, __m256i a);",
        "description": "Store 256-bits of integer data from \"a\" into memory.\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_stream_si256",
        "full_name": "void _mm256_stream_si256(__m256i *mem_addr, __m256i a);",
        "description": "Store 256-bits of integer data from \"a\" into memory using a non-temporal memory hint.\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_store_si512",
        "full_name": "void _mm512_store_si512(void *mem_addr, __m512i a);",
        "description": "Store 512-bits of integer data from \"a\" into memory. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_storeu_si512",
        "full_name": "void _mm512_storeu_si512(void *mem_addr, __m512i a);",
        "description": "Store 512-bits of integer data from \"a\" into memory.\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_stream_si512",
        "full_name": "void _mm512_stream_si512(void *mem_addr, __m512i a);",
        "description": "Store 512-bits of integer data from \"a\" into memory using a non-temporal memory hint. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_loadu_epi32",
        "full_name": "__m128i _mm_loadu_epi32(void const * mem_addr);",
        "description": "Load 128-bits (composed of 4 packed 32-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_setzero_pd",
        "full_name": "__m256d _mm256_setzero_pd(void);",
        "description": "Return vector of type __m256d with all elements set to zero."
    },
    {
        "name": "_mm256_sll_epi32",
        "full_name": "__m256i _mm256_sll_epi32(__m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_setzero_ps",
        "full_name": "__m256 _mm256_setzero_ps(void);",
        "description": "Return vector of type __m256 with all elements set to zero."
    },
    {
        "name": "_mm256_cvtepi32_pd",
        "full_name": "__m256d _mm256_cvtepi32_pd(__m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_set_epi32",
        "full_name": "__m256i _mm256_set_epi32(int e7, int e6, int e5, int e4, int e3, int e2, int e1, int e0);",
        "description": "Set packed 32-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm256_set_epi64x",
        "full_name": "__m256i _mm256_set_epi64x(int64_t e3, int64_t e2, int64_t e1, int64_t e0);",
        "description": "Set packed 64-bit integers in \"dst\" with the supplied values."
    },
    {
        "name": "_mm256_mulhrs_epi16",
        "full_name": "__m256i _mm256_mulhrs_epi16(__m256i a, __m256i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\"."
    },
    {
        "name": "_mm256_mullo_epi64",
        "full_name": "__m256i _mm256_mullo_epi64(__m256i a, __m256i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm256_mulhi_epu16",
        "full_name": "__m256i _mm256_mulhi_epu16(__m256i a, __m256i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm256_div_epu64",
        "full_name": "__m256i _mm256_div_epu64(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 64-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_div_epu32",
        "full_name": "__m256i _mm256_div_epu32(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_div_epi64",
        "full_name": "__m256i _mm256_div_epi64(__m256i a, __m256i b);",
        "description": "Divide packed signed 64-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_div_epi32",
        "full_name": "__m256i _mm256_div_epi32(__m256i a, __m256i b);",
        "description": "Divide packed signed 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_set_m128i",
        "full_name": "__m256i _mm256_set_m128i(__m128i hi, __m128i lo);",
        "description": "Set packed __m256i vector \"dst\" with the supplied values."
    },
    {
        "name": "_mm256_conflict_epi32",
        "full_name": "__m256i _mm256_conflict_epi32(__m256i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm256_conflict_epi64",
        "full_name": "__m256i _mm256_conflict_epi64(__m256i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm256_mulhi_epi16",
        "full_name": "__m256i _mm256_mulhi_epi16(__m256i a, __m256i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm256_mask_conflict_epi64",
        "full_name": "__m256i _mm256_mask_conflict_epi64(__m256i src, __mmask8 k, __m256i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm256_mask_conflict_epi32",
        "full_name": "__m256i _mm256_mask_conflict_epi32(__m256i src, __mmask8 k, __m256i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm512_test_epi64_mask",
        "full_name": "__mmask8 _mm512_test_epi64_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_test_epi32_mask",
        "full_name": "__mmask16 _mm512_test_epi32_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_test_epi8_mask",
        "full_name": "__mmask64 _mm512_test_epi8_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_setzero_ps",
        "full_name": "__m512 _mm512_setzero_ps();",
        "description": "Return vector of type __m512 with all elements set to zero."
    },
    {
        "name": "_mm512_setzero_pd",
        "full_name": "__m512d _mm512_setzero_pd();",
        "description": "Return vector of type __m512d with all elements set to zero."
    },
    {
        "name": "_mm512_setzero_si512",
        "full_name": "__m512i _mm512_setzero_si512();",
        "description": "Return vector of type __m512i with all elements set to zero."
    },
    {
        "name": "_mm512_conflict_epi32",
        "full_name": "__m512i _mm512_conflict_epi32(__m512i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm512_conflict_epi64",
        "full_name": "__m512i _mm512_conflict_epi64(__m512i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm512_permutex2var_epi32",
        "full_name": "__m512i _mm512_permutex2var_epi32(__m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permutex2var_epi32",
        "full_name": "__m256i _mm256_permutex2var_epi32(__m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_conflict_epi64",
        "full_name": "__m512i _mm512_mask_conflict_epi64(__m512i src, __mmask8 k, __m512i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm512_mask_conflict_epi32",
        "full_name": "__m512i _mm512_mask_conflict_epi32(__m512i src, __mmask16 k, __m512i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm512_permutevar_epi32",
        "full_name": "__m512i _mm512_permutevar_epi32(__m512i idx, __m512i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\". Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the \"permutevar\" name. This intrinsic is identical to \"_mm512_permutexvar_epi32\", and it is recommended that you use that intrinsic name."
    },
    {
        "name": "_mm512_mask_permutevar_epi32",
        "full_name": "__m512i _mm512_mask_permutevar_epi32(__m512i src, __mmask16 k, __m512i idx, __m512i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the \"permutevar\" name. This intrinsic is identical to \"_mm512_mask_permutexvar_epi32\", and it is recommended that you use that intrinsic name."
    },
    {
        "name": "_mm256_permutevar8x32_epi32",
        "full_name": "__m256i _mm256_permutevar8x32_epi32(__m256i a, __m256i idx);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permutevar8x32_ps",
        "full_name": "__m256 _mm256_permutevar8x32_ps(__m256 a, __m256i idx);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\"."
    },
    {
        "name": "_mm_permutex2var_epi16",
        "full_name": "__m128i _mm_permutex2var_epi16(__m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permutex2var_epi16",
        "full_name": "__m256i _mm256_permutex2var_epi16(__m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_permutex2var_epi16",
        "full_name": "__m512i _mm512_permutex2var_epi16(__m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutex2var_epi16",
        "full_name": "__m128i _mm_mask_permutex2var_epi16(__m128i a, __mmask8 k, __m128i idx, __m128i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_permutex2var_epi16",
        "full_name": "__m256i _mm256_mask_permutex2var_epi16(__m256i a, __mmask16 k, __m256i idx, __m256i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_permutex2var_epi16",
        "full_name": "__m512i _mm512_mask_permutex2var_epi16(__m512i a, __mmask32 k, __m512i idx, __m512i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutex2var_epi16",
        "full_name": "__m128i _mm_maskz_permutex2var_epi16(__mmask8 k, __m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex2var_epi16",
        "full_name": "__m256i _mm256_maskz_permutex2var_epi16(__mmask16 k, __m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex2var_epi16",
        "full_name": "__m512i _mm512_maskz_permutex2var_epi16(__mmask32 k, __m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask2_permutex2var_epi16",
        "full_name": "__m128i _mm_mask2_permutex2var_epi16(__m128i a, __m128i idx, __mmask8 k, __m128i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask2_permutex2var_epi16",
        "full_name": "__m256i _mm256_mask2_permutex2var_epi16(__m256i a, __m256i idx, __mmask16 k, __m256i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask2_permutex2var_epi16",
        "full_name": "__m512i _mm512_mask2_permutex2var_epi16(__m512i a, __m512i idx, __mmask32 k, __m512i b);",
        "description": "Shuffle 16-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_permutex2var_epi32",
        "full_name": "__m128i _mm_permutex2var_epi32(__m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutex2var_epi32",
        "full_name": "__m128i _mm_mask_permutex2var_epi32(__m128i a, __mmask8 k, __m128i idx, __m128i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutex2var_epi32",
        "full_name": "__m128i _mm_maskz_permutex2var_epi32(__mmask8 k, __m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask2_permutex2var_epi32",
        "full_name": "__m128i _mm_mask2_permutex2var_epi32(__m128i a, __m128i idx, __mmask8 k, __m128i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_permutex2var_epi32",
        "full_name": "__m256i _mm256_mask_permutex2var_epi32(__m256i a, __mmask8 k, __m256i idx, __m256i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_permutex2var_epi32",
        "full_name": "__m512i _mm512_mask_permutex2var_epi32(__m512i a, __mmask16 k, __m512i idx, __m512i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex2var_epi32",
        "full_name": "__m256i _mm256_maskz_permutex2var_epi32(__mmask8 k, __m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex2var_epi32",
        "full_name": "__m512i _mm512_maskz_permutex2var_epi32(__mmask16 k, __m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask2_permutex2var_epi32",
        "full_name": "__m256i _mm256_mask2_permutex2var_epi32(__m256i a, __m256i idx, __mmask8 k, __m256i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask2_permutex2var_epi32",
        "full_name": "__m512i _mm512_mask2_permutex2var_epi32(__m512i a, __m512i idx, __mmask16 k, __m512i b);",
        "description": "Shuffle 32-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutex2var_epi64",
        "full_name": "__m256i _mm256_permutex2var_epi64(__m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutex2var_epi64",
        "full_name": "__m256i _mm256_mask_permutex2var_epi64(__m256i a, __mmask8 k, __m256i idx, __m256i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask2_permutex2var_epi64",
        "full_name": "__m256i _mm256_mask2_permutex2var_epi64(__m256i a, __m256i idx, __mmask8 k, __m256i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex2var_epi64",
        "full_name": "__m256i _mm256_maskz_permutex2var_epi64(__mmask8 k, __m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutex2var_epi64",
        "full_name": "__m512i _mm512_permutex2var_epi64(__m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutex2var_epi64",
        "full_name": "__m512i _mm512_mask_permutex2var_epi64(__m512i a, __mmask8 k, __m512i idx, __m512i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask2_permutex2var_epi64",
        "full_name": "__m512i _mm512_mask2_permutex2var_epi64(__m512i a, __m512i idx, __mmask8 k, __m512i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex2var_epi64",
        "full_name": "__m512i _mm512_maskz_permutex2var_epi64(__mmask8 k, __m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_permutevar_ps",
        "full_name": "__m128 _mm_permutevar_ps(__m128 a, __m128i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutevar_ps",
        "full_name": "__m128 _mm_mask_permutevar_ps(__m128 src, __mmask8 k, __m128 a, __m128i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutevar_ps",
        "full_name": "__m128 _mm_maskz_permutevar_ps(__mmask8 k, __m128 a, __m128i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutevar_ps",
        "full_name": "__m256 _mm256_permutevar_ps(__m256 a, __m256i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutevar_ps",
        "full_name": "__m256 _mm256_mask_permutevar_ps(__m256 src, __mmask8 k, __m256 a, __m256i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutevar_ps",
        "full_name": "__m256 _mm256_maskz_permutevar_ps(__mmask8 k, __m256 a, __m256i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutevar_ps",
        "full_name": "__m512 _mm512_permutevar_ps(__m512 a, __m512i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutevar_ps",
        "full_name": "__m512 _mm512_mask_permutevar_ps(__m512 src, __mmask16 k, __m512 a, __m512i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutevar_ps",
        "full_name": "__m512 _mm512_maskz_permutevar_ps(__mmask16 k, __m512 a, __m512i b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permute4x64_pd",
        "full_name": "__m256d _mm256_permute4x64_pd(__m256d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permute2f128_pd",
        "full_name": "__m256d _mm256_permute2f128_pd(__m256d a, __m256d b, int imm8);",
        "description": "Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permute2f128_ps",
        "full_name": "__m256 _mm256_permute2f128_ps(__m256 a, __m256 b, int imm8);",
        "description": "Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permute2x128_si256",
        "full_name": "__m256i _mm256_permute2x128_si256(__m256i a, __m256i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of integer data) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_permute_pd",
        "full_name": "__m128d _mm_permute_pd(__m128d a, int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permute_pd",
        "full_name": "__m256d _mm256_permute_pd(__m256d a, int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_permute_pd",
        "full_name": "__m512d _mm512_permute_pd(__m512d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permute_pd",
        "full_name": "__m128d _mm_mask_permute_pd(__m128d src, __mmask8 k, __m128d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_permute_pd",
        "full_name": "__m256d _mm256_mask_permute_pd(__m256d src, __mmask8 k, __m256d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_permute_pd",
        "full_name": "__m512d _mm512_mask_permute_pd(__m512d src, __mmask8 k, __m512d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permute_pd",
        "full_name": "__m128d _mm_maskz_permute_pd(__mmask8 k, __m128d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permute_pd",
        "full_name": "__m256d _mm256_maskz_permute_pd(__mmask8 k, __m256d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permute_pd",
        "full_name": "__m512d _mm512_maskz_permute_pd(__mmask8 k, __m512d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_add_epi16",
        "full_name": "__m128i _mm_mask_add_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_add_epi16",
        "full_name": "__m128i _mm_maskz_add_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_add_epi16",
        "full_name": "__m256i _mm256_mask_add_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_add_epi16",
        "full_name": "__m256i _mm256_maskz_add_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_add_epi16",
        "full_name": "__m512i _mm512_mask_add_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_add_epi16",
        "full_name": "__m512i _mm512_maskz_add_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_add_epi32",
        "full_name": "__m128i _mm_mask_add_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_add_epi32",
        "full_name": "__m128i _mm_maskz_add_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_add_epi32",
        "full_name": "__m256i _mm256_mask_add_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_add_epi32",
        "full_name": "__m256i _mm256_maskz_add_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_add_epi32",
        "full_name": "__m512i _mm512_mask_add_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_add_epi32",
        "full_name": "__m512i _mm512_maskz_add_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_add_epi64",
        "full_name": "__m128i _mm_mask_add_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_add_epi64",
        "full_name": "__m128i _mm_maskz_add_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_add_epi64",
        "full_name": "__m256i _mm256_mask_add_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_add_epi64",
        "full_name": "__m256i _mm256_maskz_add_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_add_epi64",
        "full_name": "__m512i _mm512_mask_add_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_add_epi64",
        "full_name": "__m512i _mm512_maskz_add_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_add_epi8",
        "full_name": "__m128i _mm_mask_add_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_add_epi8",
        "full_name": "__m128i _mm_maskz_add_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_add_epi8",
        "full_name": "__m256i _mm256_mask_add_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_add_epi8",
        "full_name": "__m256i _mm256_maskz_add_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_add_epi8",
        "full_name": "__m512i _mm512_mask_add_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_add_epi8",
        "full_name": "__m512i _mm512_maskz_add_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_add_pd",
        "full_name": "__m128d _mm_mask_add_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_add_pd",
        "full_name": "__m128d _mm_maskz_add_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_add_pd",
        "full_name": "__m256d _mm256_mask_add_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_add_pd",
        "full_name": "__m256d _mm256_maskz_add_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_add_pd",
        "full_name": "__m512d _mm512_mask_add_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_add_pd",
        "full_name": "__m512d _mm512_maskz_add_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_add_ps",
        "full_name": "__m128 _mm_mask_add_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_add_ps",
        "full_name": "__m128 _mm_maskz_add_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_add_ps",
        "full_name": "__m256 _mm256_mask_add_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_add_ps",
        "full_name": "__m256 _mm256_maskz_add_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_add_ps",
        "full_name": "__m512 _mm512_mask_add_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_add_ps",
        "full_name": "__m512 _mm512_maskz_add_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_add_sd",
        "full_name": "__m128d _mm_mask_add_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Add the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_add_sd",
        "full_name": "__m128d _mm_maskz_add_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Add the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_add_ss",
        "full_name": "__m128 _mm_mask_add_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Add the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_add_ss",
        "full_name": "__m128 _mm_maskz_add_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Add the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_and_epi32",
        "full_name": "__m128i _mm_mask_and_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_and_epi32",
        "full_name": "__m128i _mm_maskz_and_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_and_epi32",
        "full_name": "__m256i _mm256_mask_and_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_and_epi32",
        "full_name": "__m256i _mm256_maskz_and_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_and_epi32",
        "full_name": "__m512i _mm512_mask_and_epi32(__m512i src, __mmask16 k, __m512i v2, __m512i v3);",
        "description": "Performs element-by-element bitwise AND between packed 32-bit integer elements of \"v2\" and \"v3\", storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_and_epi32",
        "full_name": "__m512i _mm512_maskz_and_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_and_epi64",
        "full_name": "__m128i _mm_mask_and_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_and_epi64",
        "full_name": "__m128i _mm_maskz_and_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_and_epi64",
        "full_name": "__m256i _mm256_mask_and_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_and_epi64",
        "full_name": "__m256i _mm256_maskz_and_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_and_epi64",
        "full_name": "__m512i _mm512_mask_and_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_and_epi64",
        "full_name": "__m512i _mm512_maskz_and_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_and_pd",
        "full_name": "__m128d _mm_mask_and_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_and_pd",
        "full_name": "__m128d _mm_maskz_and_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_and_pd",
        "full_name": "__m256d _mm256_and_pd(__m256d a, __m256d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_and_pd",
        "full_name": "__m256d _mm256_mask_and_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_and_pd",
        "full_name": "__m256d _mm256_maskz_and_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_and_pd",
        "full_name": "__m512d _mm512_and_pd(__m512d a, __m512d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_and_pd",
        "full_name": "__m512d _mm512_mask_and_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_and_pd",
        "full_name": "__m512d _mm512_maskz_and_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_and_ps",
        "full_name": "__m128 _mm_mask_and_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_and_ps",
        "full_name": "__m128 _mm_maskz_and_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_and_ps",
        "full_name": "__m256 _mm256_and_ps(__m256 a, __m256 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_and_ps",
        "full_name": "__m256 _mm256_mask_and_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_and_ps",
        "full_name": "__m256 _mm256_maskz_and_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_and_ps",
        "full_name": "__m512 _mm512_and_ps(__m512 a, __m512 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_and_ps",
        "full_name": "__m512 _mm512_mask_and_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_and_ps",
        "full_name": "__m512 _mm512_maskz_and_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_andnot_epi32",
        "full_name": "__m128i _mm_mask_andnot_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NOT of packed 32-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_andnot_epi32",
        "full_name": "__m128i _mm_maskz_andnot_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NOT of packed 32-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_andnot_epi32",
        "full_name": "__m256i _mm256_mask_andnot_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NOT of packed 32-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_andnot_epi32",
        "full_name": "__m256i _mm256_maskz_andnot_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NOT of packed 32-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_andnot_epi32",
        "full_name": "__m512i _mm512_andnot_epi32(__m512i a, __m512i b);",
        "description": "Compute the bitwise NOT of packed 32-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_andnot_epi32",
        "full_name": "__m512i _mm512_mask_andnot_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NOT of packed 32-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_andnot_epi32",
        "full_name": "__m512i _mm512_maskz_andnot_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NOT of packed 32-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_andnot_epi64",
        "full_name": "__m128i _mm_mask_andnot_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NOT of packed 64-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_andnot_epi64",
        "full_name": "__m128i _mm_maskz_andnot_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NOT of packed 64-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_andnot_epi64",
        "full_name": "__m256i _mm256_mask_andnot_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NOT of packed 64-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_andnot_epi64",
        "full_name": "__m256i _mm256_maskz_andnot_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NOT of packed 64-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_andnot_epi64",
        "full_name": "__m512i _mm512_andnot_epi64(__m512i a, __m512i b);",
        "description": "Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_andnot_epi64",
        "full_name": "__m512i _mm512_mask_andnot_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NOT of packed 64-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_andnot_epi64",
        "full_name": "__m512i _mm512_maskz_andnot_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NOT of packed 64-bit integers in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_blend_epi16",
        "full_name": "__m128i _mm_mask_blend_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Blend packed 16-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_blend_epi16",
        "full_name": "__m256i _mm256_mask_blend_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Blend packed 16-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_blend_epi16",
        "full_name": "__m512i _mm512_mask_blend_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Blend packed 16-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_blend_epi32",
        "full_name": "__m128i _mm_mask_blend_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Blend packed 32-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_blend_epi32",
        "full_name": "__m256i _mm256_mask_blend_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Blend packed 32-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_blend_epi64",
        "full_name": "__m128i _mm_mask_blend_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Blend packed 64-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_blend_epi64",
        "full_name": "__m256i _mm256_mask_blend_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Blend packed 64-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_blend_epi64",
        "full_name": "__m512i _mm512_mask_blend_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Blend packed 64-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_blend_epi8",
        "full_name": "__m128i _mm_mask_blend_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Blend packed 8-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_blend_epi8",
        "full_name": "__m256i _mm256_mask_blend_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Blend packed 8-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_blend_epi8",
        "full_name": "__m512i _mm512_mask_blend_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Blend packed 8-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_blend_pd",
        "full_name": "__m128d _mm_mask_blend_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Blend packed double-precision (64-bit) floating-point elements from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_blend_pd",
        "full_name": "__m256d _mm256_mask_blend_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Blend packed double-precision (64-bit) floating-point elements from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_blend_ps",
        "full_name": "__m128 _mm_mask_blend_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Blend packed single-precision (32-bit) floating-point elements from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_blend_ps",
        "full_name": "__m256 _mm256_mask_blend_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Blend packed single-precision (32-bit) floating-point elements from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_blendv_epi8",
        "full_name": "__m256i _mm256_blendv_epi8(__m256i a, __m256i b, __m256i mask);",
        "description": "Blend packed 8-bit integers from \"a\" and \"b\" using \"mask\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_broadcastb_epi8",
        "full_name": "__m128i _mm_broadcastb_epi8(__m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_mask_broadcastb_epi8",
        "full_name": "__m128i _mm_mask_broadcastb_epi8(__m128i src, __mmask16 k, __m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_broadcastb_epi8",
        "full_name": "__m128i _mm_maskz_broadcastb_epi8(__mmask16 k, __m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_broadcastd_epi32",
        "full_name": "__m128i _mm_broadcastd_epi32(__m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_mask_broadcastd_epi32",
        "full_name": "__m128i _mm_mask_broadcastd_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_broadcastd_epi32",
        "full_name": "__m128i _mm_maskz_broadcastd_epi32(__mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_broadcastq_epi64",
        "full_name": "__m128i _mm_broadcastq_epi64(__m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_mask_broadcastq_epi64",
        "full_name": "__m128i _mm_mask_broadcastq_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_broadcastq_epi64",
        "full_name": "__m128i _mm_maskz_broadcastq_epi64(__mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_broadcastsd_pd",
        "full_name": "__m128d _mm_broadcastsd_pd(__m128d a);",
        "description": "Broadcast the low double-precision (64-bit) floating-point element from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_broadcastss_ps",
        "full_name": "__m128 _mm_broadcastss_ps(__m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_mask_broadcastss_ps",
        "full_name": "__m128 _mm_mask_broadcastss_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_broadcastss_ps",
        "full_name": "__m128 _mm_maskz_broadcastss_ps(__mmask8 k, __m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_broadcastw_epi16",
        "full_name": "__m128i _mm_broadcastw_epi16(__m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm_mask_broadcastw_epi16",
        "full_name": "__m128i _mm_mask_broadcastw_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_broadcastw_epi16",
        "full_name": "__m128i _mm_maskz_broadcastw_epi16(__mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epi16",
        "full_name": "__m256i _mm256_cmpeq_epi16(__m256i a, __m256i b);",
        "description": "Compare packed 16-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmpeq_epi64",
        "full_name": "__m256i _mm256_cmpeq_epi64(__m256i a, __m256i b);",
        "description": "Compare packed 64-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmpgt_epi16",
        "full_name": "__m256i _mm256_cmpgt_epi16(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmpgt_epi64",
        "full_name": "__m256i _mm256_cmpgt_epi64(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmpgt_epi8",
        "full_name": "__m256i _mm256_cmpgt_epi8(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm_conflict_epi32",
        "full_name": "__m128i _mm_conflict_epi32(__m128i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm_mask_conflict_epi32",
        "full_name": "__m128i _mm_mask_conflict_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm_maskz_conflict_epi32",
        "full_name": "__m128i _mm_maskz_conflict_epi32(__mmask8 k, __m128i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm256_maskz_conflict_epi32",
        "full_name": "__m256i _mm256_maskz_conflict_epi32(__mmask8 k, __m256i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm512_maskz_conflict_epi32",
        "full_name": "__m512i _mm512_maskz_conflict_epi32(__mmask16 k, __m512i a);",
        "description": "Test each 32-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm_conflict_epi64",
        "full_name": "__m128i _mm_conflict_epi64(__m128i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit. Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm_mask_conflict_epi64",
        "full_name": "__m128i _mm_mask_conflict_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm_maskz_conflict_epi64",
        "full_name": "__m128i _mm_maskz_conflict_epi64(__mmask8 k, __m128i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm256_maskz_conflict_epi64",
        "full_name": "__m256i _mm256_maskz_conflict_epi64(__mmask8 k, __m256i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm512_maskz_conflict_epi64",
        "full_name": "__m512i _mm512_maskz_conflict_epi64(__mmask8 k, __m512i a);",
        "description": "Test each 64-bit element of \"a\" for equality with all other elements in \"a\" closer to the least significant bit using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in \"dst\"."
    },
    {
        "name": "_mm_mask_div_pd",
        "full_name": "__m128d _mm_mask_div_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_div_pd",
        "full_name": "__m128d _mm_maskz_div_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_div_pd",
        "full_name": "__m256d _mm256_mask_div_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_div_pd",
        "full_name": "__m256d _mm256_maskz_div_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_div_pd",
        "full_name": "__m512d _mm512_mask_div_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_div_pd",
        "full_name": "__m512d _mm512_maskz_div_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_div_ps",
        "full_name": "__m128 _mm_mask_div_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_div_ps",
        "full_name": "__m128 _mm_maskz_div_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_div_ps",
        "full_name": "__m256 _mm256_mask_div_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_div_ps",
        "full_name": "__m256 _mm256_maskz_div_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_div_ps",
        "full_name": "__m512 _mm512_mask_div_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_div_ps",
        "full_name": "__m512 _mm512_maskz_div_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_div_sd",
        "full_name": "__m128d _mm_mask_div_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Divide the lower double-precision (64-bit) floating-point element in \"a\" by the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_div_sd",
        "full_name": "__m128d _mm_maskz_div_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Divide the lower double-precision (64-bit) floating-point element in \"a\" by the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_div_ss",
        "full_name": "__m128 _mm_mask_div_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Divide the lower single-precision (32-bit) floating-point element in \"a\" by the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_div_ss",
        "full_name": "__m128 _mm_maskz_div_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Divide the lower single-precision (32-bit) floating-point element in \"a\" by the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_exp_pd",
        "full_name": "__m128d _mm_exp_pd(__m128d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_exp_pd",
        "full_name": "__m256d _mm256_exp_pd(__m256d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_exp_pd",
        "full_name": "__m512d _mm512_exp_pd(__m512d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_exp_pd",
        "full_name": "__m512d _mm512_mask_exp_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_exp_ps",
        "full_name": "__m128 _mm_exp_ps(__m128 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_exp_ps",
        "full_name": "__m256 _mm256_exp_ps(__m256 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_exp_ps",
        "full_name": "__m512 _mm512_exp_ps(__m512 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_exp_ps",
        "full_name": "__m512 _mm512_mask_exp_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_exp10_pd",
        "full_name": "__m128d _mm_exp10_pd(__m128d a);",
        "description": "Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_exp10_pd",
        "full_name": "__m256d _mm256_exp10_pd(__m256d a);",
        "description": "Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_exp10_pd",
        "full_name": "__m512d _mm512_exp10_pd(__m512d a);",
        "description": "Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_exp10_pd",
        "full_name": "__m512d _mm512_mask_exp10_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the exponential value of 10 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_exp10_ps",
        "full_name": "__m128 _mm_exp10_ps(__m128 a);",
        "description": "Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_exp10_ps",
        "full_name": "__m512 _mm512_exp10_ps(__m512 a);",
        "description": "Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_exp10_ps",
        "full_name": "__m512 _mm512_mask_exp10_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_exp2_pd",
        "full_name": "__m128d _mm_exp2_pd(__m128d a);",
        "description": "Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_exp2_pd",
        "full_name": "__m256d _mm256_exp2_pd(__m256d a);",
        "description": "Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_exp2_pd",
        "full_name": "__m512d _mm512_exp2_pd(__m512d a);",
        "description": "Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_exp2_pd",
        "full_name": "__m512d _mm512_mask_exp2_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the exponential value of 2 raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expand_epi32",
        "full_name": "__m256i _mm256_mask_expand_epi32(__m256i src, __mmask8 k, __m256i a);",
        "description": "Load contiguous active 32-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expand_epi32",
        "full_name": "__m256i _mm256_maskz_expand_epi32(__mmask8 k, __m256i a);",
        "description": "Load contiguous active 32-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expand_epi32",
        "full_name": "__m512i _mm512_mask_expand_epi32(__m512i src, __mmask16 k, __m512i a);",
        "description": "Load contiguous active 32-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expand_epi32",
        "full_name": "__m512i _mm512_maskz_expand_epi32(__mmask16 k, __m512i a);",
        "description": "Load contiguous active 32-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expand_epi64",
        "full_name": "__m128i _mm_mask_expand_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Load contiguous active 64-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expand_epi64",
        "full_name": "__m128i _mm_maskz_expand_epi64(__mmask8 k, __m128i a);",
        "description": "Load contiguous active 64-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expand_epi64",
        "full_name": "__m256i _mm256_mask_expand_epi64(__m256i src, __mmask8 k, __m256i a);",
        "description": "Load contiguous active 64-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expand_epi64",
        "full_name": "__m256i _mm256_maskz_expand_epi64(__mmask8 k, __m256i a);",
        "description": "Load contiguous active 64-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expand_epi64",
        "full_name": "__m512i _mm512_mask_expand_epi64(__m512i src, __mmask8 k, __m512i a);",
        "description": "Load contiguous active 64-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expand_epi64",
        "full_name": "__m512i _mm512_maskz_expand_epi64(__mmask8 k, __m512i a);",
        "description": "Load contiguous active 64-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expand_pd",
        "full_name": "__m128d _mm_mask_expand_pd(__m128d src, __mmask8 k, __m128d a);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expand_pd",
        "full_name": "__m128d _mm_maskz_expand_pd(__mmask8 k, __m128d a);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expand_pd",
        "full_name": "__m256d _mm256_mask_expand_pd(__m256d src, __mmask8 k, __m256d a);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expand_pd",
        "full_name": "__m256d _mm256_maskz_expand_pd(__mmask8 k, __m256d a);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expand_pd",
        "full_name": "__m512d _mm512_mask_expand_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expand_pd",
        "full_name": "__m512d _mm512_maskz_expand_pd(__mmask8 k, __m512d a);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expand_ps",
        "full_name": "__m128 _mm_mask_expand_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expand_ps",
        "full_name": "__m128 _mm_maskz_expand_ps(__mmask8 k, __m128 a);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expand_ps",
        "full_name": "__m256 _mm256_mask_expand_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expand_ps",
        "full_name": "__m256 _mm256_maskz_expand_ps(__mmask8 k, __m256 a);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expand_ps",
        "full_name": "__m512 _mm512_mask_expand_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expand_ps",
        "full_name": "__m512 _mm512_maskz_expand_ps(__mmask16 k, __m512 a);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_expm1_pd",
        "full_name": "__m128d _mm_expm1_pd(__m128d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_expm1_pd",
        "full_name": "__m256d _mm256_expm1_pd(__m256d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_expm1_pd",
        "full_name": "__m512d _mm512_expm1_pd(__m512d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_expm1_pd",
        "full_name": "__m512d _mm512_mask_expm1_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed double-precision (64-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_expm1_ps",
        "full_name": "__m128 _mm_expm1_ps(__m128 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_expm1_ps",
        "full_name": "__m256 _mm256_expm1_ps(__m256 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_expm1_ps",
        "full_name": "__m512 _mm512_expm1_ps(__m512 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_expm1_ps",
        "full_name": "__m512 _mm512_mask_expm1_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", subtract one from each element, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmadd_pd",
        "full_name": "__m128d _mm_fmadd_pd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmadd_pd",
        "full_name": "__m128d _mm_mask_fmadd_pd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmadd_pd",
        "full_name": "__m128d _mm_mask3_fmadd_pd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmadd_pd",
        "full_name": "__m128d _mm_maskz_fmadd_pd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmadd_pd",
        "full_name": "__m256d _mm256_fmadd_pd(__m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmadd_pd",
        "full_name": "__m256d _mm256_mask_fmadd_pd(__m256d a, __mmask8 k, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmadd_pd",
        "full_name": "__m256d _mm256_mask3_fmadd_pd(__m256d a, __m256d b, __m256d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmadd_pd",
        "full_name": "__m256d _mm256_maskz_fmadd_pd(__mmask8 k, __m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmadd_pd",
        "full_name": "__m512d _mm512_fmadd_pd(__m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmadd_pd",
        "full_name": "__m512d _mm512_mask_fmadd_pd(__m512d a, __mmask8 k, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmadd_pd",
        "full_name": "__m512d _mm512_mask3_fmadd_pd(__m512d a, __m512d b, __m512d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmadd_pd",
        "full_name": "__m512d _mm512_maskz_fmadd_pd(__mmask8 k, __m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmadd_ps",
        "full_name": "__m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmadd_ps",
        "full_name": "__m128 _mm_mask_fmadd_ps(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmadd_ps",
        "full_name": "__m128 _mm_mask3_fmadd_ps(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmadd_ps",
        "full_name": "__m128 _mm_maskz_fmadd_ps(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmadd_ps",
        "full_name": "__m256 _mm256_fmadd_ps(__m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmadd_ps",
        "full_name": "__m256 _mm256_mask_fmadd_ps(__m256 a, __mmask8 k, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmadd_ps",
        "full_name": "__m256 _mm256_mask3_fmadd_ps(__m256 a, __m256 b, __m256 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmadd_ps",
        "full_name": "__m256 _mm256_maskz_fmadd_ps(__mmask8 k, __m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmadd_ps",
        "full_name": "__m512 _mm512_fmadd_ps(__m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmadd_ps",
        "full_name": "__m512 _mm512_mask_fmadd_ps(__m512 a, __mmask16 k, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmadd_ps",
        "full_name": "__m512 _mm512_mask3_fmadd_ps(__m512 a, __m512 b, __m512 c, __mmask16 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmadd_ps",
        "full_name": "__m512 _mm512_maskz_fmadd_ps(__mmask16 k, __m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmadd_sd",
        "full_name": "__m128d _mm_fmadd_sd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_fmadd_sd",
        "full_name": "__m128d _mm_mask_fmadd_sd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask3_fmadd_sd",
        "full_name": "__m128d _mm_mask3_fmadd_sd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_fmadd_sd",
        "full_name": "__m128d _mm_maskz_fmadd_sd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_fmadd_ss",
        "full_name": "__m128 _mm_fmadd_ss(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_fmadd_ss",
        "full_name": "__m128 _mm_mask_fmadd_ss(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask3_fmadd_ss",
        "full_name": "__m128 _mm_mask3_fmadd_ss(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_fmadd_ss",
        "full_name": "__m128 _mm_maskz_fmadd_ss(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_fmaddsub_pd",
        "full_name": "__m128d _mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmaddsub_pd",
        "full_name": "__m128d _mm_mask_fmaddsub_pd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmaddsub_pd",
        "full_name": "__m128d _mm_mask3_fmaddsub_pd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmaddsub_pd",
        "full_name": "__m128d _mm_maskz_fmaddsub_pd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmaddsub_pd",
        "full_name": "__m256d _mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmaddsub_pd",
        "full_name": "__m256d _mm256_mask_fmaddsub_pd(__m256d a, __mmask8 k, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmaddsub_pd",
        "full_name": "__m256d _mm256_mask3_fmaddsub_pd(__m256d a, __m256d b, __m256d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmaddsub_pd",
        "full_name": "__m256d _mm256_maskz_fmaddsub_pd(__mmask8 k, __m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmaddsub_pd",
        "full_name": "__m512d _mm512_fmaddsub_pd(__m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmaddsub_pd",
        "full_name": "__m512d _mm512_mask_fmaddsub_pd(__m512d a, __mmask8 k, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmaddsub_pd",
        "full_name": "__m512d _mm512_mask3_fmaddsub_pd(__m512d a, __m512d b, __m512d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmaddsub_pd",
        "full_name": "__m512d _mm512_maskz_fmaddsub_pd(__mmask8 k, __m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmaddsub_ps",
        "full_name": "__m128 _mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmaddsub_ps",
        "full_name": "__m128 _mm_mask_fmaddsub_ps(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmaddsub_ps",
        "full_name": "__m128 _mm_mask3_fmaddsub_ps(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmaddsub_ps",
        "full_name": "__m128 _mm_maskz_fmaddsub_ps(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmaddsub_ps",
        "full_name": "__m256 _mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmaddsub_ps",
        "full_name": "__m256 _mm256_mask_fmaddsub_ps(__m256 a, __mmask8 k, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmaddsub_ps",
        "full_name": "__m256 _mm256_mask3_fmaddsub_ps(__m256 a, __m256 b, __m256 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmaddsub_ps",
        "full_name": "__m256 _mm256_maskz_fmaddsub_ps(__mmask8 k, __m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmaddsub_ps",
        "full_name": "__m512 _mm512_fmaddsub_ps(__m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmaddsub_ps",
        "full_name": "__m512 _mm512_mask_fmaddsub_ps(__m512 a, __mmask16 k, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmaddsub_ps",
        "full_name": "__m512 _mm512_mask3_fmaddsub_ps(__m512 a, __m512 b, __m512 c, __mmask16 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmaddsub_ps",
        "full_name": "__m512 _mm512_maskz_fmaddsub_ps(__mmask16 k, __m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmsub_pd",
        "full_name": "__m128d _mm_fmsub_pd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmsub_pd",
        "full_name": "__m128d _mm_mask_fmsub_pd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmsub_pd",
        "full_name": "__m128d _mm_mask3_fmsub_pd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmsub_pd",
        "full_name": "__m128d _mm_maskz_fmsub_pd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmsub_pd",
        "full_name": "__m256d _mm256_fmsub_pd(__m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmsub_pd",
        "full_name": "__m256d _mm256_mask_fmsub_pd(__m256d a, __mmask8 k, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmsub_pd",
        "full_name": "__m256d _mm256_mask3_fmsub_pd(__m256d a, __m256d b, __m256d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmsub_pd",
        "full_name": "__m256d _mm256_maskz_fmsub_pd(__mmask8 k, __m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmsub_pd",
        "full_name": "__m512d _mm512_fmsub_pd(__m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmsub_pd",
        "full_name": "__m512d _mm512_mask_fmsub_pd(__m512d a, __mmask8 k, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmsub_pd",
        "full_name": "__m512d _mm512_mask3_fmsub_pd(__m512d a, __m512d b, __m512d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmsub_pd",
        "full_name": "__m512d _mm512_maskz_fmsub_pd(__mmask8 k, __m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmsub_ps",
        "full_name": "__m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmsub_ps",
        "full_name": "__m128 _mm_mask_fmsub_ps(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmsub_ps",
        "full_name": "__m128 _mm_mask3_fmsub_ps(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmsub_ps",
        "full_name": "__m128 _mm_maskz_fmsub_ps(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmsub_ps",
        "full_name": "__m256 _mm256_fmsub_ps(__m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmsub_ps",
        "full_name": "__m256 _mm256_mask_fmsub_ps(__m256 a, __mmask8 k, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmsub_ps",
        "full_name": "__m256 _mm256_mask3_fmsub_ps(__m256 a, __m256 b, __m256 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmsub_ps",
        "full_name": "__m256 _mm256_maskz_fmsub_ps(__mmask8 k, __m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmsub_ps",
        "full_name": "__m512 _mm512_fmsub_ps(__m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmsub_ps",
        "full_name": "__m512 _mm512_mask_fmsub_ps(__m512 a, __mmask16 k, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmsub_ps",
        "full_name": "__m512 _mm512_mask3_fmsub_ps(__m512 a, __m512 b, __m512 c, __mmask16 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmsub_ps",
        "full_name": "__m512 _mm512_maskz_fmsub_ps(__mmask16 k, __m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmsub_sd",
        "full_name": "__m128d _mm_fmsub_sd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_fmsub_sd",
        "full_name": "__m128d _mm_mask_fmsub_sd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask3_fmsub_sd",
        "full_name": "__m128d _mm_mask3_fmsub_sd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_fmsub_sd",
        "full_name": "__m128d _mm_maskz_fmsub_sd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_fmsub_ss",
        "full_name": "__m128 _mm_fmsub_ss(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_fmsub_ss",
        "full_name": "__m128 _mm_mask_fmsub_ss(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask3_fmsub_ss",
        "full_name": "__m128 _mm_mask3_fmsub_ss(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_fmsub_ss",
        "full_name": "__m128 _mm_maskz_fmsub_ss(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_fmsubadd_pd",
        "full_name": "__m128d _mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmsubadd_pd",
        "full_name": "__m128d _mm_mask_fmsubadd_pd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmsubadd_pd",
        "full_name": "__m128d _mm_mask3_fmsubadd_pd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmsubadd_pd",
        "full_name": "__m128d _mm_maskz_fmsubadd_pd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmsubadd_pd",
        "full_name": "__m256d _mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmsubadd_pd",
        "full_name": "__m256d _mm256_mask_fmsubadd_pd(__m256d a, __mmask8 k, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmsubadd_pd",
        "full_name": "__m256d _mm256_mask3_fmsubadd_pd(__m256d a, __m256d b, __m256d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmsubadd_pd",
        "full_name": "__m256d _mm256_maskz_fmsubadd_pd(__mmask8 k, __m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmsubadd_pd",
        "full_name": "__m512d _mm512_fmsubadd_pd(__m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmsubadd_pd",
        "full_name": "__m512d _mm512_mask_fmsubadd_pd(__m512d a, __mmask8 k, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmsubadd_pd",
        "full_name": "__m512d _mm512_mask3_fmsubadd_pd(__m512d a, __m512d b, __m512d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmsubadd_pd",
        "full_name": "__m512d _mm512_maskz_fmsubadd_pd(__mmask8 k, __m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fmsubadd_ps",
        "full_name": "__m128 _mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fmsubadd_ps",
        "full_name": "__m128 _mm_mask_fmsubadd_ps(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fmsubadd_ps",
        "full_name": "__m128 _mm_mask3_fmsubadd_ps(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fmsubadd_ps",
        "full_name": "__m128 _mm_maskz_fmsubadd_ps(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fmsubadd_ps",
        "full_name": "__m256 _mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fmsubadd_ps",
        "full_name": "__m256 _mm256_mask_fmsubadd_ps(__m256 a, __mmask8 k, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fmsubadd_ps",
        "full_name": "__m256 _mm256_mask3_fmsubadd_ps(__m256 a, __m256 b, __m256 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fmsubadd_ps",
        "full_name": "__m256 _mm256_maskz_fmsubadd_ps(__mmask8 k, __m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fmsubadd_ps",
        "full_name": "__m512 _mm512_fmsubadd_ps(__m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fmsubadd_ps",
        "full_name": "__m512 _mm512_mask_fmsubadd_ps(__m512 a, __mmask16 k, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fmsubadd_ps",
        "full_name": "__m512 _mm512_mask3_fmsubadd_ps(__m512 a, __m512 b, __m512 c, __mmask16 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fmsubadd_ps",
        "full_name": "__m512 _mm512_maskz_fmsubadd_ps(__mmask16 k, __m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fnmadd_pd",
        "full_name": "__m128d _mm_fnmadd_pd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fnmadd_pd",
        "full_name": "__m128d _mm_mask_fnmadd_pd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fnmadd_pd",
        "full_name": "__m128d _mm_mask3_fnmadd_pd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fnmadd_pd",
        "full_name": "__m128d _mm_maskz_fnmadd_pd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fnmadd_pd",
        "full_name": "__m256d _mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fnmadd_pd",
        "full_name": "__m256d _mm256_mask_fnmadd_pd(__m256d a, __mmask8 k, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fnmadd_pd",
        "full_name": "__m256d _mm256_mask3_fnmadd_pd(__m256d a, __m256d b, __m256d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fnmadd_pd",
        "full_name": "__m256d _mm256_maskz_fnmadd_pd(__mmask8 k, __m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fnmadd_pd",
        "full_name": "__m512d _mm512_fnmadd_pd(__m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fnmadd_pd",
        "full_name": "__m512d _mm512_mask_fnmadd_pd(__m512d a, __mmask8 k, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fnmadd_pd",
        "full_name": "__m512d _mm512_mask3_fnmadd_pd(__m512d a, __m512d b, __m512d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fnmadd_pd",
        "full_name": "__m512d _mm512_maskz_fnmadd_pd(__mmask8 k, __m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fnmadd_ps",
        "full_name": "__m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fnmadd_ps",
        "full_name": "__m128 _mm_mask_fnmadd_ps(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fnmadd_ps",
        "full_name": "__m128 _mm_mask3_fnmadd_ps(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fnmadd_ps",
        "full_name": "__m128 _mm_maskz_fnmadd_ps(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fnmadd_ps",
        "full_name": "__m256 _mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fnmadd_ps",
        "full_name": "__m256 _mm256_mask_fnmadd_ps(__m256 a, __mmask8 k, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fnmadd_ps",
        "full_name": "__m256 _mm256_mask3_fnmadd_ps(__m256 a, __m256 b, __m256 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fnmadd_ps",
        "full_name": "__m256 _mm256_maskz_fnmadd_ps(__mmask8 k, __m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fnmadd_ps",
        "full_name": "__m512 _mm512_fnmadd_ps(__m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fnmadd_ps",
        "full_name": "__m512 _mm512_mask_fnmadd_ps(__m512 a, __mmask16 k, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fnmadd_ps",
        "full_name": "__m512 _mm512_mask3_fnmadd_ps(__m512 a, __m512 b, __m512 c, __mmask16 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fnmadd_ps",
        "full_name": "__m512 _mm512_maskz_fnmadd_ps(__mmask16 k, __m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fnmadd_sd",
        "full_name": "__m128d _mm_fnmadd_sd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_fnmadd_sd",
        "full_name": "__m128d _mm_mask_fnmadd_sd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask3_fnmadd_sd",
        "full_name": "__m128d _mm_mask3_fnmadd_sd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_fnmadd_sd",
        "full_name": "__m128d _mm_maskz_fnmadd_sd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_fnmadd_ss",
        "full_name": "__m128 _mm_fnmadd_ss(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_fnmadd_ss",
        "full_name": "__m128 _mm_mask_fnmadd_ss(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask3_fnmadd_ss",
        "full_name": "__m128 _mm_mask3_fnmadd_ss(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_fnmadd_ss",
        "full_name": "__m128 _mm_maskz_fnmadd_ss(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_fnmsub_pd",
        "full_name": "__m128d _mm_fnmsub_pd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fnmsub_pd",
        "full_name": "__m128d _mm_mask_fnmsub_pd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fnmsub_pd",
        "full_name": "__m128d _mm_mask3_fnmsub_pd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fnmsub_pd",
        "full_name": "__m128d _mm_maskz_fnmsub_pd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fnmsub_pd",
        "full_name": "__m256d _mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fnmsub_pd",
        "full_name": "__m256d _mm256_mask_fnmsub_pd(__m256d a, __mmask8 k, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fnmsub_pd",
        "full_name": "__m256d _mm256_mask3_fnmsub_pd(__m256d a, __m256d b, __m256d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fnmsub_pd",
        "full_name": "__m256d _mm256_maskz_fnmsub_pd(__mmask8 k, __m256d a, __m256d b, __m256d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fnmsub_pd",
        "full_name": "__m512d _mm512_fnmsub_pd(__m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fnmsub_pd",
        "full_name": "__m512d _mm512_mask_fnmsub_pd(__m512d a, __mmask8 k, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fnmsub_pd",
        "full_name": "__m512d _mm512_mask3_fnmsub_pd(__m512d a, __m512d b, __m512d c, __mmask8 k);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fnmsub_pd",
        "full_name": "__m512d _mm512_maskz_fnmsub_pd(__mmask8 k, __m512d a, __m512d b, __m512d c);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fnmsub_ps",
        "full_name": "__m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_fnmsub_ps",
        "full_name": "__m128 _mm_mask_fnmsub_ps(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask3_fnmsub_ps",
        "full_name": "__m128 _mm_mask3_fnmsub_ps(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_fnmsub_ps",
        "full_name": "__m128 _mm_maskz_fnmsub_ps(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_fnmsub_ps",
        "full_name": "__m256 _mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_fnmsub_ps",
        "full_name": "__m256 _mm256_mask_fnmsub_ps(__m256 a, __mmask8 k, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask3_fnmsub_ps",
        "full_name": "__m256 _mm256_mask3_fnmsub_ps(__m256 a, __m256 b, __m256 c, __mmask8 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_fnmsub_ps",
        "full_name": "__m256 _mm256_maskz_fnmsub_ps(__mmask8 k, __m256 a, __m256 b, __m256 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_fnmsub_ps",
        "full_name": "__m512 _mm512_fnmsub_ps(__m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_fnmsub_ps",
        "full_name": "__m512 _mm512_mask_fnmsub_ps(__m512 a, __mmask16 k, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask3_fnmsub_ps",
        "full_name": "__m512 _mm512_mask3_fnmsub_ps(__m512 a, __m512 b, __m512 c, __mmask16 k);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_fnmsub_ps",
        "full_name": "__m512 _mm512_maskz_fnmsub_ps(__mmask16 k, __m512 a, __m512 b, __m512 c);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fnmsub_sd",
        "full_name": "__m128d _mm_fnmsub_sd(__m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_fnmsub_sd",
        "full_name": "__m128d _mm_mask_fnmsub_sd(__m128d a, __mmask8 k, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask3_fnmsub_sd",
        "full_name": "__m128d _mm_mask3_fnmsub_sd(__m128d a, __m128d b, __m128d c, __mmask8 k);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_fnmsub_sd",
        "full_name": "__m128d _mm_maskz_fnmsub_sd(__mmask8 k, __m128d a, __m128d b, __m128d c);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_fnmsub_ss",
        "full_name": "__m128 _mm_fnmsub_ss(__m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_fnmsub_ss",
        "full_name": "__m128 _mm_mask_fnmsub_ss(__m128 a, __mmask8 k, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask3_fnmsub_ss",
        "full_name": "__m128 _mm_mask3_fnmsub_ss(__m128 a, __m128 b, __m128 c, __mmask8 k);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_fnmsub_ss",
        "full_name": "__m128 _mm_maskz_fnmsub_ss(__mmask8 k, __m128 a, __m128 b, __m128 c);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm256_hadd_epi16",
        "full_name": "__m256i _mm256_hadd_epi16(__m256i a, __m256i b);",
        "description": "Horizontally add adjacent pairs of 16-bit integers in \"a\" and \"b\", and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm256_hadd_epi32",
        "full_name": "__m256i _mm256_hadd_epi32(__m256i a, __m256i b);",
        "description": "Horizontally add adjacent pairs of 32-bit integers in \"a\" and \"b\", and pack the signed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm256_hadd_pd",
        "full_name": "__m256d _mm256_hadd_pd(__m256d a, __m256d b);",
        "description": "Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm256_hadd_ps",
        "full_name": "__m256 _mm256_hadd_ps(__m256 a, __m256 b);",
        "description": "Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm256_hsub_epi16",
        "full_name": "__m256i _mm256_hsub_epi16(__m256i a, __m256i b);",
        "description": "Horizontally subtract adjacent pairs of 16-bit integers in \"a\" and \"b\", and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm256_hsub_epi32",
        "full_name": "__m256i _mm256_hsub_epi32(__m256i a, __m256i b);",
        "description": "Horizontally subtract adjacent pairs of 32-bit integers in \"a\" and \"b\", and pack the signed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm256_hsub_pd",
        "full_name": "__m256d _mm256_hsub_pd(__m256d a, __m256d b);",
        "description": "Horizontally subtract adjacent pairs of double-precision (64-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm256_hsub_ps",
        "full_name": "__m256 _mm256_hsub_ps(__m256 a, __m256 b);",
        "description": "Horizontally subtract adjacent pairs of single-precision (32-bit) floating-point elements in \"a\" and \"b\", and pack the results in \"dst\"."
    },
    {
        "name": "_mm_lzcnt_epi32",
        "full_name": "__m128i _mm_lzcnt_epi32(__m128i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_lzcnt_epi32",
        "full_name": "__m128i _mm_mask_lzcnt_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_lzcnt_epi32",
        "full_name": "__m128i _mm_maskz_lzcnt_epi32(__mmask8 k, __m128i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_lzcnt_epi32",
        "full_name": "__m256i _mm256_lzcnt_epi32(__m256i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_lzcnt_epi32",
        "full_name": "__m256i _mm256_mask_lzcnt_epi32(__m256i src, __mmask8 k, __m256i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_lzcnt_epi32",
        "full_name": "__m256i _mm256_maskz_lzcnt_epi32(__mmask8 k, __m256i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_lzcnt_epi32",
        "full_name": "__m512i _mm512_lzcnt_epi32(__m512i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_lzcnt_epi32",
        "full_name": "__m512i _mm512_mask_lzcnt_epi32(__m512i src, __mmask16 k, __m512i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_lzcnt_epi32",
        "full_name": "__m512i _mm512_maskz_lzcnt_epi32(__mmask16 k, __m512i a);",
        "description": "Counts the number of leading zero bits in each packed 32-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_lzcnt_epi64",
        "full_name": "__m128i _mm_lzcnt_epi64(__m128i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_lzcnt_epi64",
        "full_name": "__m128i _mm_mask_lzcnt_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_lzcnt_epi64",
        "full_name": "__m128i _mm_maskz_lzcnt_epi64(__mmask8 k, __m128i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_lzcnt_epi64",
        "full_name": "__m256i _mm256_lzcnt_epi64(__m256i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_lzcnt_epi64",
        "full_name": "__m256i _mm256_mask_lzcnt_epi64(__m256i src, __mmask8 k, __m256i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_lzcnt_epi64",
        "full_name": "__m256i _mm256_maskz_lzcnt_epi64(__mmask8 k, __m256i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_lzcnt_epi64",
        "full_name": "__m512i _mm512_lzcnt_epi64(__m512i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_lzcnt_epi64",
        "full_name": "__m512i _mm512_mask_lzcnt_epi64(__m512i src, __mmask8 k, __m512i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_lzcnt_epi64",
        "full_name": "__m512i _mm512_maskz_lzcnt_epi64(__mmask8 k, __m512i a);",
        "description": "Counts the number of leading zero bits in each packed 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_max_epi16",
        "full_name": "__m128i _mm_mask_max_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epi16",
        "full_name": "__m128i _mm_maskz_max_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_max_epi16",
        "full_name": "__m256i _mm256_mask_max_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epi16",
        "full_name": "__m256i _mm256_maskz_max_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_max_epi16",
        "full_name": "__m256i _mm256_max_epi16(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_max_epi16",
        "full_name": "__m512i _mm512_mask_max_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epi16",
        "full_name": "__m512i _mm512_maskz_max_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_epi16",
        "full_name": "__m512i _mm512_max_epi16(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_mask_max_epi32",
        "full_name": "__m128i _mm_mask_max_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epi32",
        "full_name": "__m128i _mm_maskz_max_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_max_epi32",
        "full_name": "__m256i _mm256_mask_max_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epi32",
        "full_name": "__m256i _mm256_maskz_max_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_max_epi32",
        "full_name": "__m512i _mm512_mask_max_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epi32",
        "full_name": "__m512i _mm512_maskz_max_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_max_epi64",
        "full_name": "__m128i _mm_mask_max_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epi64",
        "full_name": "__m128i _mm_maskz_max_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_max_epi64",
        "full_name": "__m128i _mm_max_epi64(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm256_mask_max_epi64",
        "full_name": "__m256i _mm256_mask_max_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epi64",
        "full_name": "__m256i _mm256_maskz_max_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_max_epi64",
        "full_name": "__m256i _mm256_max_epi64(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_max_epi64",
        "full_name": "__m512i _mm512_mask_max_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epi64",
        "full_name": "__m512i _mm512_maskz_max_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_epi64",
        "full_name": "__m512i _mm512_max_epi64(__m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_mask_max_epi8",
        "full_name": "__m128i _mm_mask_max_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epi8",
        "full_name": "__m128i _mm_maskz_max_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_max_epi8",
        "full_name": "__m256i _mm256_mask_max_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epi8",
        "full_name": "__m256i _mm256_maskz_max_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_max_epi8",
        "full_name": "__m256i _mm256_max_epi8(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_max_epi8",
        "full_name": "__m512i _mm512_mask_max_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epi8",
        "full_name": "__m512i _mm512_maskz_max_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_epi8",
        "full_name": "__m512i _mm512_max_epi8(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_mask_max_epu16",
        "full_name": "__m128i _mm_mask_max_epu16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epu16",
        "full_name": "__m128i _mm_maskz_max_epu16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_max_epu16",
        "full_name": "__m256i _mm256_mask_max_epu16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epu16",
        "full_name": "__m256i _mm256_maskz_max_epu16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_max_epu16",
        "full_name": "__m256i _mm256_max_epu16(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_max_epu16",
        "full_name": "__m512i _mm512_mask_max_epu16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epu16",
        "full_name": "__m512i _mm512_maskz_max_epu16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_epu16",
        "full_name": "__m512i _mm512_max_epu16(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm256_mask_max_epu32",
        "full_name": "__m256i _mm256_mask_max_epu32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epu32",
        "full_name": "__m256i _mm256_maskz_max_epu32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_max_epu32",
        "full_name": "__m256i _mm256_max_epu32(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_max_epu32",
        "full_name": "__m512i _mm512_mask_max_epu32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epu32",
        "full_name": "__m512i _mm512_maskz_max_epu32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_epu32",
        "full_name": "__m512i _mm512_max_epu32(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_mask_max_epu64",
        "full_name": "__m128i _mm_mask_max_epu64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epu64",
        "full_name": "__m128i _mm_maskz_max_epu64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_max_epu64",
        "full_name": "__m128i _mm_max_epu64(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm256_mask_max_epu64",
        "full_name": "__m256i _mm256_mask_max_epu64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epu64",
        "full_name": "__m256i _mm256_maskz_max_epu64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_max_epu64",
        "full_name": "__m256i _mm256_max_epu64(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_max_epu64",
        "full_name": "__m512i _mm512_mask_max_epu64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epu64",
        "full_name": "__m512i _mm512_maskz_max_epu64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_epu64",
        "full_name": "__m512i _mm512_max_epu64(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_mask_max_epu8",
        "full_name": "__m128i _mm_mask_max_epu8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epu8",
        "full_name": "__m128i _mm_maskz_max_epu8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_max_epu8",
        "full_name": "__m256i _mm256_mask_max_epu8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_max_epu8",
        "full_name": "__m256i _mm256_maskz_max_epu8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_max_epu8",
        "full_name": "__m256i _mm256_max_epu8(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_max_epu8",
        "full_name": "__m512i _mm512_mask_max_epu8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_max_epu8",
        "full_name": "__m512i _mm512_maskz_max_epu8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_epu8",
        "full_name": "__m512i _mm512_max_epu8(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm_mask_min_epi16",
        "full_name": "__m128i _mm_mask_min_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epi16",
        "full_name": "__m128i _mm_maskz_min_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_min_epi16",
        "full_name": "__m256i _mm256_mask_min_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epi16",
        "full_name": "__m256i _mm256_maskz_min_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epi16",
        "full_name": "__m256i _mm256_min_epi16(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epi16",
        "full_name": "__m512i _mm512_mask_min_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epi16",
        "full_name": "__m512i _mm512_maskz_min_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epi16",
        "full_name": "__m512i _mm512_min_epi16(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mask_min_epi32",
        "full_name": "__m128i _mm_mask_min_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epi32",
        "full_name": "__m128i _mm_maskz_min_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_min_epi32",
        "full_name": "__m256i _mm256_mask_min_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epi32",
        "full_name": "__m256i _mm256_maskz_min_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epi32",
        "full_name": "__m256i _mm256_min_epi32(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epi32",
        "full_name": "__m512i _mm512_mask_min_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epi32",
        "full_name": "__m512i _mm512_maskz_min_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epi32",
        "full_name": "__m512i _mm512_min_epi32(__m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mask_min_epi64",
        "full_name": "__m128i _mm_mask_min_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epi64",
        "full_name": "__m128i _mm_maskz_min_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_min_epi64",
        "full_name": "__m128i _mm_min_epi64(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm256_mask_min_epi64",
        "full_name": "__m256i _mm256_mask_min_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epi64",
        "full_name": "__m256i _mm256_maskz_min_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epi64",
        "full_name": "__m256i _mm256_min_epi64(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epi64",
        "full_name": "__m512i _mm512_mask_min_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epi64",
        "full_name": "__m512i _mm512_maskz_min_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epi64",
        "full_name": "__m512i _mm512_min_epi64(__m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mask_min_epi8",
        "full_name": "__m128i _mm_mask_min_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epi8",
        "full_name": "__m128i _mm_maskz_min_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_min_epi8",
        "full_name": "__m256i _mm256_mask_min_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epi8",
        "full_name": "__m256i _mm256_maskz_min_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epi8",
        "full_name": "__m256i _mm256_min_epi8(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epi8",
        "full_name": "__m512i _mm512_mask_min_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epi8",
        "full_name": "__m512i _mm512_maskz_min_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epi8",
        "full_name": "__m512i _mm512_min_epi8(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mask_min_epu16",
        "full_name": "__m128i _mm_mask_min_epu16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epu16",
        "full_name": "__m128i _mm_maskz_min_epu16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_min_epu16",
        "full_name": "__m256i _mm256_mask_min_epu16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epu16",
        "full_name": "__m256i _mm256_maskz_min_epu16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epu16",
        "full_name": "__m256i _mm256_min_epu16(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epu16",
        "full_name": "__m512i _mm512_mask_min_epu16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epu16",
        "full_name": "__m512i _mm512_maskz_min_epu16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epu16",
        "full_name": "__m512i _mm512_min_epu16(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm256_mask_min_epu32",
        "full_name": "__m256i _mm256_mask_min_epu32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epu32",
        "full_name": "__m256i _mm256_maskz_min_epu32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epu32",
        "full_name": "__m256i _mm256_min_epu32(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epu32",
        "full_name": "__m512i _mm512_mask_min_epu32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epu32",
        "full_name": "__m512i _mm512_maskz_min_epu32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epu32",
        "full_name": "__m512i _mm512_min_epu32(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mask_min_epu64",
        "full_name": "__m128i _mm_mask_min_epu64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epu64",
        "full_name": "__m128i _mm_maskz_min_epu64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_min_epu64",
        "full_name": "__m128i _mm_min_epu64(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm256_mask_min_epu64",
        "full_name": "__m256i _mm256_mask_min_epu64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epu64",
        "full_name": "__m256i _mm256_maskz_min_epu64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epu64",
        "full_name": "__m256i _mm256_min_epu64(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epu64",
        "full_name": "__m512i _mm512_mask_min_epu64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epu64",
        "full_name": "__m512i _mm512_maskz_min_epu64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epu64",
        "full_name": "__m512i _mm512_min_epu64(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mask_min_epu8",
        "full_name": "__m128i _mm_mask_min_epu8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epu8",
        "full_name": "__m128i _mm_maskz_min_epu8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_min_epu8",
        "full_name": "__m256i _mm256_mask_min_epu8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_min_epu8",
        "full_name": "__m256i _mm256_maskz_min_epu8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_min_epu8",
        "full_name": "__m256i _mm256_min_epu8(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm512_mask_min_epu8",
        "full_name": "__m512i _mm512_mask_min_epu8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_min_epu8",
        "full_name": "__m512i _mm512_maskz_min_epu8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_min_epu8",
        "full_name": "__m512i _mm512_min_epu8(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\"."
    },
    {
        "name": "_mm_mask_mov_epi16",
        "full_name": "__m128i _mm_mask_mov_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Move packed 16-bit integers from \"a\" into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mov_epi16",
        "full_name": "__m128i _mm_maskz_mov_epi16(__mmask8 k, __m128i a);",
        "description": "Move packed 16-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mov_epi16",
        "full_name": "__m256i _mm256_mask_mov_epi16(__m256i src, __mmask16 k, __m256i a);",
        "description": "Move packed 16-bit integers from \"a\" into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mov_epi16",
        "full_name": "__m256i _mm256_maskz_mov_epi16(__mmask16 k, __m256i a);",
        "description": "Move packed 16-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mov_epi16",
        "full_name": "__m512i _mm512_mask_mov_epi16(__m512i src, __mmask32 k, __m512i a);",
        "description": "Move packed 16-bit integers from \"a\" into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mov_epi16",
        "full_name": "__m512i _mm512_maskz_mov_epi16(__mmask32 k, __m512i a);",
        "description": "Move packed 16-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mov_epi32",
        "full_name": "__m128i _mm_mask_mov_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Move packed 32-bit integers from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mov_epi32",
        "full_name": "__m128i _mm_maskz_mov_epi32(__mmask8 k, __m128i a);",
        "description": "Move packed 32-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mov_epi32",
        "full_name": "__m256i _mm256_mask_mov_epi32(__m256i src, __mmask8 k, __m256i a);",
        "description": "Move packed 32-bit integers from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mov_epi32",
        "full_name": "__m256i _mm256_maskz_mov_epi32(__mmask8 k, __m256i a);",
        "description": "Move packed 32-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mov_epi32",
        "full_name": "__m512i _mm512_mask_mov_epi32(__m512i src, __mmask16 k, __m512i a);",
        "description": "Move packed 32-bit integers from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mov_epi32",
        "full_name": "__m512i _mm512_maskz_mov_epi32(__mmask16 k, __m512i a);",
        "description": "Move packed 32-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mov_epi64",
        "full_name": "__m128i _mm_mask_mov_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Move packed 64-bit integers from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mov_epi64",
        "full_name": "__m128i _mm_maskz_mov_epi64(__mmask8 k, __m128i a);",
        "description": "Move packed 64-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mov_epi64",
        "full_name": "__m256i _mm256_mask_mov_epi64(__m256i src, __mmask8 k, __m256i a);",
        "description": "Move packed 64-bit integers from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mov_epi64",
        "full_name": "__m256i _mm256_maskz_mov_epi64(__mmask8 k, __m256i a);",
        "description": "Move packed 64-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mov_epi64",
        "full_name": "__m512i _mm512_mask_mov_epi64(__m512i src, __mmask8 k, __m512i a);",
        "description": "Move packed 64-bit integers from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mov_epi64",
        "full_name": "__m512i _mm512_maskz_mov_epi64(__mmask8 k, __m512i a);",
        "description": "Move packed 64-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mov_epi8",
        "full_name": "__m128i _mm_mask_mov_epi8(__m128i src, __mmask16 k, __m128i a);",
        "description": "Move packed 8-bit integers from \"a\" into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mov_epi8",
        "full_name": "__m128i _mm_maskz_mov_epi8(__mmask16 k, __m128i a);",
        "description": "Move packed 8-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mov_epi8",
        "full_name": "__m256i _mm256_mask_mov_epi8(__m256i src, __mmask32 k, __m256i a);",
        "description": "Move packed 8-bit integers from \"a\" into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mov_epi8",
        "full_name": "__m256i _mm256_maskz_mov_epi8(__mmask32 k, __m256i a);",
        "description": "Move packed 8-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mov_epi8",
        "full_name": "__m512i _mm512_mask_mov_epi8(__m512i src, __mmask64 k, __m512i a);",
        "description": "Move packed 8-bit integers from \"a\" into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mov_epi8",
        "full_name": "__m512i _mm512_maskz_mov_epi8(__mmask64 k, __m512i a);",
        "description": "Move packed 8-bit integers from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mov_pd",
        "full_name": "__m128d _mm_mask_mov_pd(__m128d src, __mmask8 k, __m128d a);",
        "description": "Move packed double-precision (64-bit) floating-point elements from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mov_pd",
        "full_name": "__m128d _mm_maskz_mov_pd(__mmask8 k, __m128d a);",
        "description": "Move packed double-precision (64-bit) floating-point elements from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mov_pd",
        "full_name": "__m256d _mm256_mask_mov_pd(__m256d src, __mmask8 k, __m256d a);",
        "description": "Move packed double-precision (64-bit) floating-point elements from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mov_pd",
        "full_name": "__m256d _mm256_maskz_mov_pd(__mmask8 k, __m256d a);",
        "description": "Move packed double-precision (64-bit) floating-point elements from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mov_pd",
        "full_name": "__m512d _mm512_mask_mov_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Move packed double-precision (64-bit) floating-point elements from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mov_pd",
        "full_name": "__m512d _mm512_maskz_mov_pd(__mmask8 k, __m512d a);",
        "description": "Move packed double-precision (64-bit) floating-point elements from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mov_ps",
        "full_name": "__m128 _mm_mask_mov_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Move packed single-precision (32-bit) floating-point elements from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mov_ps",
        "full_name": "__m128 _mm_maskz_mov_ps(__mmask8 k, __m128 a);",
        "description": "Move packed single-precision (32-bit) floating-point elements from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mov_ps",
        "full_name": "__m256 _mm256_mask_mov_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Move packed single-precision (32-bit) floating-point elements from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mov_ps",
        "full_name": "__m256 _mm256_maskz_mov_ps(__mmask8 k, __m256 a);",
        "description": "Move packed single-precision (32-bit) floating-point elements from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mov_ps",
        "full_name": "__m512 _mm512_mask_mov_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Move packed single-precision (32-bit) floating-point elements from \"a\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mov_ps",
        "full_name": "__m512 _mm512_maskz_mov_ps(__mmask16 k, __m512 a);",
        "description": "Move packed single-precision (32-bit) floating-point elements from \"a\" into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_move_sd",
        "full_name": "__m128d _mm_mask_move_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Move the lower double-precision (64-bit) floating-point element from \"b\" to the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_move_sd",
        "full_name": "__m128d _mm_maskz_move_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Move the lower double-precision (64-bit) floating-point element from \"b\" to the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_move_ss",
        "full_name": "__m128 _mm_mask_move_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Move the lower single-precision (32-bit) floating-point element from \"b\" to the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_move_ss",
        "full_name": "__m128 _mm_maskz_move_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Move the lower single-precision (32-bit) floating-point element from \"b\" to the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_movedup_pd",
        "full_name": "__m128d _mm_mask_movedup_pd(__m128d src, __mmask8 k, __m128d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_movedup_pd",
        "full_name": "__m128d _mm_maskz_movedup_pd(__mmask8 k, __m128d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_movedup_pd",
        "full_name": "__m256d _mm256_mask_movedup_pd(__m256d src, __mmask8 k, __m256d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_movedup_pd",
        "full_name": "__m256d _mm256_maskz_movedup_pd(__mmask8 k, __m256d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_movedup_pd",
        "full_name": "__m256d _mm256_movedup_pd(__m256d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_movedup_pd",
        "full_name": "__m512d _mm512_mask_movedup_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_movedup_pd",
        "full_name": "__m512d _mm512_maskz_movedup_pd(__mmask8 k, __m512d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_movedup_pd",
        "full_name": "__m512d _mm512_movedup_pd(__m512d a);",
        "description": "Duplicate even-indexed double-precision (64-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_movehdup_ps",
        "full_name": "__m128 _mm_mask_movehdup_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_movehdup_ps",
        "full_name": "__m128 _mm_maskz_movehdup_ps(__mmask8 k, __m128 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_movehdup_ps",
        "full_name": "__m256 _mm256_mask_movehdup_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_movehdup_ps",
        "full_name": "__m256 _mm256_maskz_movehdup_ps(__mmask8 k, __m256 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_movehdup_ps",
        "full_name": "__m256 _mm256_movehdup_ps(__m256 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_movehdup_ps",
        "full_name": "__m512 _mm512_mask_movehdup_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_movehdup_ps",
        "full_name": "__m512 _mm512_maskz_movehdup_ps(__mmask16 k, __m512 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_movehdup_ps",
        "full_name": "__m512 _mm512_movehdup_ps(__m512 a);",
        "description": "Duplicate odd-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_moveldup_ps",
        "full_name": "__m128 _mm_mask_moveldup_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_moveldup_ps",
        "full_name": "__m128 _mm_maskz_moveldup_ps(__mmask8 k, __m128 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_moveldup_ps",
        "full_name": "__m256 _mm256_mask_moveldup_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_moveldup_ps",
        "full_name": "__m256 _mm256_maskz_moveldup_ps(__mmask8 k, __m256 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_moveldup_ps",
        "full_name": "__m256 _mm256_moveldup_ps(__m256 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_moveldup_ps",
        "full_name": "__m512 _mm512_mask_moveldup_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_moveldup_ps",
        "full_name": "__m512 _mm512_maskz_moveldup_ps(__mmask16 k, __m512 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_moveldup_ps",
        "full_name": "__m512 _mm512_moveldup_ps(__m512 a);",
        "description": "Duplicate even-indexed single-precision (32-bit) floating-point elements from \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_movm_epi16",
        "full_name": "__m128i _mm_movm_epi16(__mmask8 k);",
        "description": "Set each packed 16-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm256_movm_epi16",
        "full_name": "__m256i _mm256_movm_epi16(__mmask16 k);",
        "description": "Set each packed 16-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm512_movm_epi16",
        "full_name": "__m512i _mm512_movm_epi16(__mmask32 k);",
        "description": "Set each packed 16-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm_movm_epi32",
        "full_name": "__m128i _mm_movm_epi32(__mmask8 k);",
        "description": "Set each packed 32-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm256_movm_epi32",
        "full_name": "__m256i _mm256_movm_epi32(__mmask8 k);",
        "description": "Set each packed 32-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm_movm_epi64",
        "full_name": "__m128i _mm_movm_epi64(__mmask8 k);",
        "description": "Set each packed 64-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm256_movm_epi64",
        "full_name": "__m256i _mm256_movm_epi64(__mmask8 k);",
        "description": "Set each packed 64-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm512_movm_epi64",
        "full_name": "__m512i _mm512_movm_epi64(__mmask8 k);",
        "description": "Set each packed 64-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm_movm_epi8",
        "full_name": "__m128i _mm_movm_epi8(__mmask16 k);",
        "description": "Set each packed 8-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm256_movm_epi8",
        "full_name": "__m256i _mm256_movm_epi8(__mmask32 k);",
        "description": "Set each packed 8-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm_mask_mul_pd",
        "full_name": "__m128d _mm_mask_mul_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mul_pd",
        "full_name": "__m128d _mm_maskz_mul_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mul_pd",
        "full_name": "__m256d _mm256_mask_mul_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mul_pd",
        "full_name": "__m256d _mm256_maskz_mul_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mul_pd",
        "full_name": "__m512d _mm512_mask_mul_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).  RM."
    },
    {
        "name": "_mm512_maskz_mul_pd",
        "full_name": "__m512d _mm512_maskz_mul_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mul_ps",
        "full_name": "__m128 _mm_mask_mul_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mul_ps",
        "full_name": "__m128 _mm_maskz_mul_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mul_ps",
        "full_name": "__m256 _mm256_mask_mul_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).  RM."
    },
    {
        "name": "_mm256_maskz_mul_ps",
        "full_name": "__m256 _mm256_maskz_mul_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mul_ps",
        "full_name": "__m512 _mm512_mask_mul_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).  RM."
    },
    {
        "name": "_mm512_maskz_mul_ps",
        "full_name": "__m512 _mm512_maskz_mul_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mul_sd",
        "full_name": "__m128d _mm_mask_mul_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Multiply the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_mul_sd",
        "full_name": "__m128d _mm_maskz_mul_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Multiply the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_mul_ss",
        "full_name": "__m128 _mm_mask_mul_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Multiply the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_mul_ss",
        "full_name": "__m128 _mm_maskz_mul_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Multiply the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_mullo_epi16",
        "full_name": "__m128i _mm_mask_mullo_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mullo_epi16",
        "full_name": "__m128i _mm_maskz_mullo_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mullo_epi16",
        "full_name": "__m256i _mm256_mask_mullo_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mullo_epi16",
        "full_name": "__m256i _mm256_maskz_mullo_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mullo_epi16",
        "full_name": "__m512i _mm512_mask_mullo_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mullo_epi16",
        "full_name": "__m512i _mm512_maskz_mullo_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mullo_epi32",
        "full_name": "__m128i _mm_mask_mullo_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mullo_epi32",
        "full_name": "__m128i _mm_maskz_mullo_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mullo_epi32",
        "full_name": "__m256i _mm256_mask_mullo_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mullo_epi32",
        "full_name": "__m256i _mm256_maskz_mullo_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mullo_epi32",
        "full_name": "__m512i _mm512_mask_mullo_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mullo_epi32",
        "full_name": "__m512i _mm512_maskz_mullo_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mullox_epi64",
        "full_name": "__m512i _mm512_mask_mullox_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Multiplies elements in packed 64-bit integer vectors \"a\" and \"b\" together, storing the lower 64 bits of the result in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_or_epi32",
        "full_name": "__m128i _mm_mask_or_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_or_epi32",
        "full_name": "__m128i _mm_maskz_or_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_or_epi32",
        "full_name": "__m128i _mm_or_epi32(__m128i a, __m128i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_or_epi32",
        "full_name": "__m256i _mm256_mask_or_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_or_epi32",
        "full_name": "__m256i _mm256_maskz_or_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_or_epi32",
        "full_name": "__m256i _mm256_or_epi32(__m256i a, __m256i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_or_epi32",
        "full_name": "__m512i _mm512_mask_or_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_or_epi32",
        "full_name": "__m512i _mm512_maskz_or_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_or_epi64",
        "full_name": "__m128i _mm_mask_or_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_or_epi64",
        "full_name": "__m128i _mm_maskz_or_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_or_epi64",
        "full_name": "__m128i _mm_or_epi64(__m128i a, __m128i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_or_epi64",
        "full_name": "__m256i _mm256_mask_or_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_or_epi64",
        "full_name": "__m256i _mm256_maskz_or_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_or_epi64",
        "full_name": "__m256i _mm256_or_epi64(__m256i a, __m256i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_or_epi64",
        "full_name": "__m512i _mm512_mask_or_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_or_epi64",
        "full_name": "__m512i _mm512_maskz_or_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_or_pd",
        "full_name": "__m128d _mm_mask_or_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_or_pd",
        "full_name": "__m128d _mm_maskz_or_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_or_pd",
        "full_name": "__m256d _mm256_mask_or_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_or_pd",
        "full_name": "__m256d _mm256_maskz_or_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_or_pd",
        "full_name": "__m512d _mm512_mask_or_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_or_pd",
        "full_name": "__m512d _mm512_maskz_or_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_or_pd",
        "full_name": "__m512d _mm512_or_pd(__m512d a, __m512d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_or_ps",
        "full_name": "__m128 _mm_mask_or_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_or_ps",
        "full_name": "__m128 _mm_maskz_or_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_or_ps",
        "full_name": "__m256 _mm256_mask_or_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_or_ps",
        "full_name": "__m256 _mm256_maskz_or_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_or_ps",
        "full_name": "__m512 _mm512_mask_or_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_or_ps",
        "full_name": "__m512 _mm512_maskz_or_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_or_ps",
        "full_name": "__m512 _mm512_or_ps(__m512 a, __m512 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutevar_pd",
        "full_name": "__m128d _mm_mask_permutevar_pd(__m128d src, __mmask8 k, __m128d a, __m128i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" using the control in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutevar_pd",
        "full_name": "__m128d _mm_maskz_permutevar_pd(__mmask8 k, __m128d a, __m128i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" using the control in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_permutevar_pd",
        "full_name": "__m128d _mm_permutevar_pd(__m128d a, __m128i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" using the control in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutevar_pd",
        "full_name": "__m256d _mm256_mask_permutevar_pd(__m256d src, __mmask8 k, __m256d a, __m256i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutevar_pd",
        "full_name": "__m256d _mm256_maskz_permutevar_pd(__mmask8 k, __m256d a, __m256i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutevar_pd",
        "full_name": "__m256d _mm256_permutevar_pd(__m256d a, __m256i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutevar_pd",
        "full_name": "__m512d _mm512_mask_permutevar_pd(__m512d src, __mmask8 k, __m512d a, __m512i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutevar_pd",
        "full_name": "__m512d _mm512_maskz_permutevar_pd(__mmask8 k, __m512d a, __m512i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutevar_pd",
        "full_name": "__m512d _mm512_permutevar_pd(__m512d a, __m512i b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutex2var_epi64",
        "full_name": "__m128i _mm_mask_permutex2var_epi64(__m128i a, __mmask8 k, __m128i idx, __m128i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask2_permutex2var_epi64",
        "full_name": "__m128i _mm_mask2_permutex2var_epi64(__m128i a, __m128i idx, __mmask8 k, __m128i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutex2var_epi64",
        "full_name": "__m128i _mm_maskz_permutex2var_epi64(__mmask8 k, __m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_permutex2var_epi64",
        "full_name": "__m128i _mm_permutex2var_epi64(__m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 64-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutex2var_pd",
        "full_name": "__m128d _mm_mask_permutex2var_pd(__m128d a, __mmask8 k, __m128i idx, __m128d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutex2var_pd",
        "full_name": "__m128d _mm_maskz_permutex2var_pd(__mmask8 k, __m128d a, __m128i idx, __m128d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_permutex2var_pd",
        "full_name": "__m128d _mm_permutex2var_pd(__m128d a, __m128i idx, __m128d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_recip_pd",
        "full_name": "__m512d _mm512_mask_recip_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Computes the reciprocal of packed double-precision (64-bit) floating-point elements in \"a\", storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_recip_pd",
        "full_name": "__m512d _mm512_recip_pd(__m512d a);",
        "description": "Computes the reciprocal of packed double-precision (64-bit) floating-point elements in \"a\", storing the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_recip_ps",
        "full_name": "__m512 _mm512_mask_recip_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Computes the reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_recip_ps",
        "full_name": "__m512 _mm512_recip_ps(__m512 a);",
        "description": "Computes the reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", storing the results in \"dst\"."
    },
    {
        "name": "_mm256_setr_epi32",
        "full_name": "__m256i _mm256_setr_epi32(int e7, int e6, int e5, int e4, int e3, int e2, int e1, int e0);",
        "description": "Set packed 32-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm256_setr_epi64x",
        "full_name": "__m256i _mm256_setr_epi64x(__int64 e3, __int64 e2, __int64 e1, __int64 e0);",
        "description": "Set packed 64-bit integers in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm256_setr_pd",
        "full_name": "__m256d _mm256_setr_pd(double e3, double e2, double e1, double e0);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm512_setr_pd",
        "full_name": "__m512d _mm512_setr_pd(double e7, double e6, double e5, double e4, double e3, double e2, double e1, double e0);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm256_setr_ps",
        "full_name": "__m256 _mm256_setr_ps(float e7, float e6, float e5, float e4, float e3, float e2, float e1, float e0);",
        "description": "Set packed single-precision (32-bit) floating-point elements in \"dst\" with the supplied values in reverse order."
    },
    {
        "name": "_mm256_sign_epi16",
        "full_name": "__m256i _mm256_sign_epi16(__m256i a, __m256i b);",
        "description": "Negate packed signed 16-bit integers in \"a\" when the corresponding signed 16-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm256_sign_epi32",
        "full_name": "__m256i _mm256_sign_epi32(__m256i a, __m256i b);",
        "description": "Negate packed signed 32-bit integers in \"a\" when the corresponding signed 32-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm256_sign_epi8",
        "full_name": "__m256i _mm256_sign_epi8(__m256i a, __m256i b);",
        "description": "Negate packed signed 8-bit integers in \"a\" when the corresponding signed 8-bit integer in \"b\" is negative, and store the results in \"dst\". Element in \"dst\" are zeroed out when the corresponding element in \"b\" is zero."
    },
    {
        "name": "_mm_mask_sub_epi16",
        "full_name": "__m128i _mm_mask_sub_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sub_epi16",
        "full_name": "__m128i _mm_maskz_sub_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sub_epi16",
        "full_name": "__m256i _mm256_mask_sub_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sub_epi16",
        "full_name": "__m256i _mm256_maskz_sub_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sub_epi16",
        "full_name": "__m512i _mm512_mask_sub_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sub_epi16",
        "full_name": "__m512i _mm512_maskz_sub_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sub_epi32",
        "full_name": "__m128i _mm_mask_sub_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sub_epi32",
        "full_name": "__m128i _mm_maskz_sub_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sub_epi32",
        "full_name": "__m256i _mm256_mask_sub_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sub_epi32",
        "full_name": "__m256i _mm256_maskz_sub_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sub_epi32",
        "full_name": "__m512i _mm512_mask_sub_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sub_epi32",
        "full_name": "__m512i _mm512_maskz_sub_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sub_epi64",
        "full_name": "__m128i _mm_mask_sub_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sub_epi64",
        "full_name": "__m128i _mm_maskz_sub_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sub_epi64",
        "full_name": "__m256i _mm256_mask_sub_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sub_epi64",
        "full_name": "__m256i _mm256_maskz_sub_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sub_epi64",
        "full_name": "__m512i _mm512_mask_sub_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sub_epi64",
        "full_name": "__m512i _mm512_maskz_sub_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sub_epi8",
        "full_name": "__m128i _mm_mask_sub_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sub_epi8",
        "full_name": "__m128i _mm_maskz_sub_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sub_epi8",
        "full_name": "__m256i _mm256_mask_sub_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sub_epi8",
        "full_name": "__m256i _mm256_maskz_sub_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sub_epi8",
        "full_name": "__m512i _mm512_mask_sub_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sub_epi8",
        "full_name": "__m512i _mm512_maskz_sub_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sub_pd",
        "full_name": "__m128d _mm_mask_sub_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sub_pd",
        "full_name": "__m128d _mm_maskz_sub_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sub_pd",
        "full_name": "__m256d _mm256_mask_sub_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sub_pd",
        "full_name": "__m256d _mm256_maskz_sub_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sub_pd",
        "full_name": "__m512d _mm512_mask_sub_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sub_pd",
        "full_name": "__m512d _mm512_maskz_sub_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sub_ps",
        "full_name": "__m128 _mm_mask_sub_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sub_ps",
        "full_name": "__m128 _mm_maskz_sub_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sub_ps",
        "full_name": "__m256 _mm256_mask_sub_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sub_ps",
        "full_name": "__m256 _mm256_maskz_sub_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sub_ps",
        "full_name": "__m512 _mm512_mask_sub_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sub_ps",
        "full_name": "__m512 _mm512_maskz_sub_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sub_sd",
        "full_name": "__m128d _mm_mask_sub_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Subtract the lower double-precision (64-bit) floating-point element in \"b\" from the lower double-precision (64-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_sub_sd",
        "full_name": "__m128d _mm_maskz_sub_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Subtract the lower double-precision (64-bit) floating-point element in \"b\" from the lower double-precision (64-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_sub_ss",
        "full_name": "__m128 _mm_mask_sub_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Subtract the lower single-precision (32-bit) floating-point element in \"b\" from the lower single-precision (32-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_sub_ss",
        "full_name": "__m128 _mm_maskz_sub_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Subtract the lower single-precision (32-bit) floating-point element in \"b\" from the lower single-precision (32-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_xor_epi32",
        "full_name": "__m128i _mm_mask_xor_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_xor_epi32",
        "full_name": "__m128i _mm_maskz_xor_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_xor_epi32",
        "full_name": "__m128i _mm_xor_epi32(__m128i a, __m128i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_xor_epi32",
        "full_name": "__m256i _mm256_mask_xor_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_xor_epi32",
        "full_name": "__m256i _mm256_maskz_xor_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_xor_epi32",
        "full_name": "__m256i _mm256_xor_epi32(__m256i a, __m256i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_xor_epi32",
        "full_name": "__m512i _mm512_mask_xor_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_xor_epi32",
        "full_name": "__m512i _mm512_maskz_xor_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_xor_epi32",
        "full_name": "__m512i _mm512_xor_epi32(__m512i a, __m512i b);",
        "description": "Compute the bitwise XOR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_xor_epi64",
        "full_name": "__m128i _mm_mask_xor_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_xor_epi64",
        "full_name": "__m128i _mm_maskz_xor_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_xor_epi64",
        "full_name": "__m128i _mm_xor_epi64(__m128i a, __m128i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_xor_epi64",
        "full_name": "__m256i _mm256_mask_xor_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_xor_epi64",
        "full_name": "__m256i _mm256_maskz_xor_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_xor_epi64",
        "full_name": "__m256i _mm256_xor_epi64(__m256i a, __m256i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_xor_epi64",
        "full_name": "__m512i _mm512_mask_xor_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_xor_epi64",
        "full_name": "__m512i _mm512_maskz_xor_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_xor_epi64",
        "full_name": "__m512i _mm512_xor_epi64(__m512i a, __m512i b);",
        "description": "Compute the bitwise XOR of packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_xor_pd",
        "full_name": "__m128d _mm_mask_xor_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_xor_pd",
        "full_name": "__m128d _mm_maskz_xor_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_xor_pd",
        "full_name": "__m256d _mm256_mask_xor_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_xor_pd",
        "full_name": "__m256d _mm256_maskz_xor_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_xor_pd",
        "full_name": "__m256d _mm256_xor_pd(__m256d a, __m256d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_xor_pd",
        "full_name": "__m512d _mm512_mask_xor_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_xor_pd",
        "full_name": "__m512d _mm512_maskz_xor_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_xor_ps",
        "full_name": "__m128 _mm_mask_xor_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_xor_ps",
        "full_name": "__m128 _mm_maskz_xor_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_xor_ps",
        "full_name": "__m256 _mm256_mask_xor_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_xor_ps",
        "full_name": "__m256 _mm256_maskz_xor_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_xor_ps",
        "full_name": "__m256 _mm256_xor_ps(__m256 a, __m256 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_xor_ps",
        "full_name": "__m512 _mm512_mask_xor_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_xor_ps",
        "full_name": "__m512 _mm512_maskz_xor_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_exp2_ps",
        "full_name": "__m512 _mm512_mask_exp2_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtsd_f64",
        "full_name": "double _mm256_cvtsd_f64(__m256d a);",
        "description": "Copy the lower double-precision (64-bit) floating-point element of \"a\" to \"dst\"."
    },
    {
        "name": "_mm512_cvtsd_f64",
        "full_name": "double _mm512_cvtsd_f64(__m512d a);",
        "description": "Copy the lower double-precision (64-bit) floating-point element of \"a\" to \"dst\"."
    },
    {
        "name": "_mm256_cvtsi256_si32",
        "full_name": "int _mm256_cvtsi256_si32(__m256i a);",
        "description": "Copy the lower 32-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_mm512_cvtsi512_si32",
        "full_name": "int _mm512_cvtsi512_si32(__m512i a);",
        "description": "Copy the lower 32-bit integer in \"a\" to \"dst\"."
    },
    {
        "name": "_mm256_cvtss_f32",
        "full_name": "float _mm256_cvtss_f32(__m256 a);",
        "description": "Copy the lower single-precision (32-bit) floating-point element of \"a\" to \"dst\"."
    },
    {
        "name": "_mm512_cvtss_f32",
        "full_name": "float _mm512_cvtss_f32(__m512 a);",
        "description": "Copy the lower single-precision (32-bit) floating-point element of \"a\" to \"dst\"."
    },
    {
        "name": "_mm512_cvt_roundepi32_ps",
        "full_name": "__m512 _mm512_cvt_roundepi32_ps(__m512i a, int rounding);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundepi32_ps",
        "full_name": "__m512 _mm512_mask_cvt_roundepi32_ps(__m512 src, __mmask16 k, __m512i a, int rounding);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundepi32_ps",
        "full_name": "__m512 _mm512_maskz_cvt_roundepi32_ps(__mmask16 k, __m512i a, int rounding);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundepu32_ps",
        "full_name": "__m512 _mm512_cvt_roundepu32_ps(__m512i a, int rounding);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundepu32_ps",
        "full_name": "__m512 _mm512_mask_cvt_roundepu32_ps(__m512 src, __mmask16 k, __m512i a, int rounding);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundepu32_ps",
        "full_name": "__m512 _mm512_maskz_cvt_roundepu32_ps(__mmask16 k, __m512i a, int rounding);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundepi64_pd",
        "full_name": "__m512d _mm512_cvt_roundepi64_pd(__m512i a, int rounding);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundepi64_pd",
        "full_name": "__m512d _mm512_mask_cvt_roundepi64_pd(__m512d src, __mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundepi64_pd",
        "full_name": "__m512d _mm512_maskz_cvt_roundepi64_pd(__mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundepu64_pd",
        "full_name": "__m512d _mm512_cvt_roundepu64_pd(__m512i a, int rounding);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundepu64_pd",
        "full_name": "__m512d _mm512_mask_cvt_roundepu64_pd(__m512d src, __mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundepu64_pd",
        "full_name": "__m512d _mm512_maskz_cvt_roundepu64_pd(__mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundepi64_ps",
        "full_name": "__m256 _mm512_cvt_roundepi64_ps(__m512i a, int rounding);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundepi64_ps",
        "full_name": "__m256 _mm512_mask_cvt_roundepi64_ps(__m256 src, __mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundepi64_ps",
        "full_name": "__m256 _mm512_maskz_cvt_roundepi64_ps(__mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundepu64_ps",
        "full_name": "__m256 _mm512_cvt_roundepu64_ps(__m512i a, int rounding);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundepu64_ps",
        "full_name": "__m256 _mm512_mask_cvt_roundepu64_ps(__m256 src, __mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundepu64_ps",
        "full_name": "__m256 _mm512_maskz_cvt_roundepu64_ps(__mmask8 k, __m512i a, int rounding);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundi32_ss",
        "full_name": "__m128 _mm_cvt_roundi32_ss(__m128 a, int b, int rounding);",
        "description": "Convert the signed 32-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundi64_sd",
        "full_name": "__m128d _mm_cvt_roundi64_sd(__m128d a, __int64 b, int rounding);",
        "description": "Convert the signed 64-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundi64_ss",
        "full_name": "__m128 _mm_cvt_roundi64_ss(__m128 a, __int64 b, int rounding);",
        "description": "Convert the signed 64-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_cvtepi16_epi32",
        "full_name": "__m128i _mm_mask_cvtepi16_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi16_epi32",
        "full_name": "__m128i _mm_maskz_cvtepi16_epi32(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi16_epi32",
        "full_name": "__m256i _mm256_cvtepi16_epi32(__m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi16_epi32",
        "full_name": "__m256i _mm256_mask_cvtepi16_epi32(__m256i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi16_epi32",
        "full_name": "__m256i _mm256_maskz_cvtepi16_epi32(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi16_epi32",
        "full_name": "__m512i _mm512_cvtepi16_epi32(__m256i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi16_epi32",
        "full_name": "__m512i _mm512_mask_cvtepi16_epi32(__m512i src, __mmask16 k, __m256i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi16_epi32",
        "full_name": "__m512i _mm512_maskz_cvtepi16_epi32(__mmask16 k, __m256i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepi16_epi64",
        "full_name": "__m128i _mm_mask_cvtepi16_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi16_epi64",
        "full_name": "__m128i _mm_maskz_cvtepi16_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi16_epi64",
        "full_name": "__m256i _mm256_cvtepi16_epi64(__m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi16_epi64",
        "full_name": "__m256i _mm256_mask_cvtepi16_epi64(__m256i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi16_epi64",
        "full_name": "__m256i _mm256_maskz_cvtepi16_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi16_epi64",
        "full_name": "__m512i _mm512_cvtepi16_epi64(__m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi16_epi64",
        "full_name": "__m512i _mm512_mask_cvtepi16_epi64(__m512i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi16_epi64",
        "full_name": "__m512i _mm512_maskz_cvtepi16_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi16_epi8",
        "full_name": "__m128i _mm_cvtepi16_epi8(__m128i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi16_epi8",
        "full_name": "__m128i _mm_mask_cvtepi16_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi16_epi8",
        "full_name": "__m128i _mm_maskz_cvtepi16_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi16_epi8",
        "full_name": "__m128i _mm256_cvtepi16_epi8(__m256i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi16_epi8",
        "full_name": "__m128i _mm256_mask_cvtepi16_epi8(__m128i src, __mmask16 k, __m256i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi16_epi8",
        "full_name": "__m128i _mm256_maskz_cvtepi16_epi8(__mmask16 k, __m256i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi16_epi8",
        "full_name": "__m256i _mm512_cvtepi16_epi8(__m512i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi16_epi8",
        "full_name": "__m256i _mm512_mask_cvtepi16_epi8(__m256i src, __mmask32 k, __m512i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi16_epi8",
        "full_name": "__m256i _mm512_maskz_cvtepi16_epi8(__mmask32 k, __m512i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi32_epi16",
        "full_name": "__m128i _mm_cvtepi32_epi16(__m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi32_epi16",
        "full_name": "__m128i _mm_mask_cvtepi32_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi32_epi16",
        "full_name": "__m128i _mm_maskz_cvtepi32_epi16(__mmask8 k, __m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi32_epi16",
        "full_name": "__m128i _mm256_cvtepi32_epi16(__m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi32_epi16",
        "full_name": "__m128i _mm256_mask_cvtepi32_epi16(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi32_epi16",
        "full_name": "__m128i _mm256_maskz_cvtepi32_epi16(__mmask8 k, __m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi32_epi16",
        "full_name": "__m256i _mm512_cvtepi32_epi16(__m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi32_epi16",
        "full_name": "__m256i _mm512_mask_cvtepi32_epi16(__m256i src, __mmask16 k, __m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi32_epi16",
        "full_name": "__m256i _mm512_maskz_cvtepi32_epi16(__mmask16 k, __m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi32_epi64",
        "full_name": "__m128i _mm_cvtepi32_epi64(__m128i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi32_epi64",
        "full_name": "__m128i _mm_mask_cvtepi32_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi32_epi64",
        "full_name": "__m128i _mm_maskz_cvtepi32_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi32_epi64",
        "full_name": "__m256i _mm256_cvtepi32_epi64(__m128i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi32_epi64",
        "full_name": "__m256i _mm256_mask_cvtepi32_epi64(__m256i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi32_epi64",
        "full_name": "__m256i _mm256_maskz_cvtepi32_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi32_epi64",
        "full_name": "__m512i _mm512_cvtepi32_epi64(__m256i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi32_epi64",
        "full_name": "__m512i _mm512_mask_cvtepi32_epi64(__m512i src, __mmask8 k, __m256i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi32_epi64",
        "full_name": "__m512i _mm512_maskz_cvtepi32_epi64(__mmask8 k, __m256i a);",
        "description": "Sign extend packed 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcast_f32x2",
        "full_name": "__m256 _mm256_broadcast_f32x2(__m128 a);",
        "description": "Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcast_f32x2",
        "full_name": "__m256 _mm256_mask_broadcast_f32x2(__m256 src, __mmask8 k, __m128 a);",
        "description": "Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcast_f32x2",
        "full_name": "__m256 _mm256_maskz_broadcast_f32x2(__mmask8 k, __m128 a);",
        "description": "Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_f32x2",
        "full_name": "__m512 _mm512_broadcast_f32x2(__m128 a);",
        "description": "Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_f32x2",
        "full_name": "__m512 _mm512_mask_broadcast_f32x2(__m512 src, __mmask16 k, __m128 a);",
        "description": "Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_f32x2",
        "full_name": "__m512 _mm512_maskz_broadcast_f32x2(__mmask16 k, __m128 a);",
        "description": "Broadcast the lower 2 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_kadd_mask16",
        "full_name": "__mmask16 _kadd_mask16(__mmask16 a, __mmask16 b);",
        "description": "Add 16-bit masks in \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kadd_mask32",
        "full_name": "__mmask32 _kadd_mask32(__mmask32 a, __mmask32 b);",
        "description": "Add 32-bit masks in \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kadd_mask64",
        "full_name": "__mmask64 _kadd_mask64(__mmask64 a, __mmask64 b);",
        "description": "Add 64-bit masks in \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kadd_mask8",
        "full_name": "__mmask8 _kadd_mask8(__mmask8 a, __mmask8 b);",
        "description": "Add 8-bit masks in \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_mm256_broadcast_f32x4",
        "full_name": "__m256 _mm256_broadcast_f32x4(__m128 a);",
        "description": "Broadcast the 4 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcast_f32x4",
        "full_name": "__m256 _mm256_mask_broadcast_f32x4(__m256 src, __mmask8 k, __m128 a);",
        "description": "Broadcast the 4 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcast_f32x4",
        "full_name": "__m256 _mm256_maskz_broadcast_f32x4(__mmask8 k, __m128 a);",
        "description": "Broadcast the 4 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_f32x4",
        "full_name": "__m512 _mm512_broadcast_f32x4(__m128 a);",
        "description": "Broadcast the 4 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_f32x4",
        "full_name": "__m512 _mm512_mask_broadcast_f32x4(__m512 src, __mmask16 k, __m128 a);",
        "description": "Broadcast the 4 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_f32x4",
        "full_name": "__m512 _mm512_maskz_broadcast_f32x4(__mmask16 k, __m128 a);",
        "description": "Broadcast the 4 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_f32x8",
        "full_name": "__m512 _mm512_broadcast_f32x8(__m256 a);",
        "description": "Broadcast the 8 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_f32x8",
        "full_name": "__m512 _mm512_mask_broadcast_f32x8(__m512 src, __mmask16 k, __m256 a);",
        "description": "Broadcast the 8 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_f32x8",
        "full_name": "__m512 _mm512_maskz_broadcast_f32x8(__mmask16 k, __m256 a);",
        "description": "Broadcast the 8 packed single-precision (32-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcast_f64x2",
        "full_name": "__m256d _mm256_broadcast_f64x2(__m128d a);",
        "description": "Broadcast the 2 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcast_f64x2",
        "full_name": "__m256d _mm256_mask_broadcast_f64x2(__m256d src, __mmask8 k, __m128d a);",
        "description": "Broadcast the 2 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcast_f64x2",
        "full_name": "__m256d _mm256_maskz_broadcast_f64x2(__mmask8 k, __m128d a);",
        "description": "Broadcast the 2 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_f64x2",
        "full_name": "__m512d _mm512_broadcast_f64x2(__m128d a);",
        "description": "Broadcast the 2 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_f64x2",
        "full_name": "__m512d _mm512_mask_broadcast_f64x2(__m512d src, __mmask8 k, __m128d a);",
        "description": "Broadcast the 2 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_f64x2",
        "full_name": "__m512d _mm512_maskz_broadcast_f64x2(__mmask8 k, __m128d a);",
        "description": "Broadcast the 2 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_f64x4",
        "full_name": "__m512d _mm512_broadcast_f64x4(__m256d a);",
        "description": "Broadcast the 4 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_f64x4",
        "full_name": "__m512d _mm512_mask_broadcast_f64x4(__m512d src, __mmask8 k, __m256d a);",
        "description": "Broadcast the 4 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_f64x4",
        "full_name": "__m512d _mm512_maskz_broadcast_f64x4(__mmask8 k, __m256d a);",
        "description": "Broadcast the 4 packed double-precision (64-bit) floating-point elements from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_reduce_max_epi32",
        "full_name": "int _mm512_reduce_max_epi32(__m512i a);",
        "description": "Reduce the packed signed 32-bit integers in \"a\" by maximum. Returns the maximum of all elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_max_epu32",
        "full_name": "unsigned int _mm512_reduce_max_epu32(__m512i a);",
        "description": "Reduce the packed unsigned 32-bit integers in \"a\" by maximum. Returns the maximum of all elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_min_epi32",
        "full_name": "int _mm512_reduce_min_epi32(__m512i a);",
        "description": "Reduce the packed signed 32-bit integers in \"a\" by minimum. Returns the minimum of all elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_min_epu32",
        "full_name": "unsigned int _mm512_reduce_min_epu32(__m512i a);",
        "description": "Reduce the packed unsigned 32-bit integers in \"a\" by minimum. Returns the minimum of all elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_max_pd",
        "full_name": "double _mm512_reduce_max_pd(__m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by maximum. Returns the maximum of all elements in \"a\"."
    },
    {
        "name": "_mm_mask_compress_epi32",
        "full_name": "__m128i _mm_mask_compress_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm_maskz_compress_epi32",
        "full_name": "__m128i _mm_maskz_compress_epi32(__mmask8 k, __m128i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm_mask_compress_epi64",
        "full_name": "__m128i _mm_mask_compress_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm_maskz_compress_epi64",
        "full_name": "__m128i _mm_maskz_compress_epi64(__mmask8 k, __m128i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm_mask_compress_ps",
        "full_name": "__m128 _mm_mask_compress_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm_maskz_compress_ps",
        "full_name": "__m128 _mm_maskz_compress_ps(__mmask8 k, __m128 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm_mask_compress_pd",
        "full_name": "__m128d _mm_mask_compress_pd(__m128d src, __mmask8 k, __m128d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm_maskz_compress_pd",
        "full_name": "__m128d _mm_maskz_compress_pd(__mmask8 k, __m128d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm256_mask_compress_epi32",
        "full_name": "__m256i _mm256_mask_compress_epi32(__m256i src, __mmask8 k, __m256i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm256_maskz_compress_epi32",
        "full_name": "__m256i _mm256_maskz_compress_epi32(__mmask8 k, __m256i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm256_mask_compress_epi64",
        "full_name": "__m256i _mm256_mask_compress_epi64(__m256i src, __mmask8 k, __m256i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm256_maskz_compress_epi64",
        "full_name": "__m256i _mm256_maskz_compress_epi64(__mmask8 k, __m256i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm256_mask_compress_ps",
        "full_name": "__m256 _mm256_mask_compress_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm256_maskz_compress_ps",
        "full_name": "__m256 _mm256_maskz_compress_ps(__mmask8 k, __m256 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm256_mask_compress_pd",
        "full_name": "__m256d _mm256_mask_compress_pd(__m256d src, __mmask8 k, __m256d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm256_maskz_compress_pd",
        "full_name": "__m256d _mm256_maskz_compress_pd(__mmask8 k, __m256d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm512_mask_compress_epi32",
        "full_name": "__m512i _mm512_mask_compress_epi32(__m512i src, __mmask16 k, __m512i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm512_maskz_compress_epi32",
        "full_name": "__m512i _mm512_maskz_compress_epi32(__mmask16 k, __m512i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm512_mask_compress_epi64",
        "full_name": "__m512i _mm512_mask_compress_epi64(__m512i src, __mmask8 k, __m512i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm512_maskz_compress_epi64",
        "full_name": "__m512i _mm512_maskz_compress_epi64(__mmask8 k, __m512i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm512_mask_compress_ps",
        "full_name": "__m512 _mm512_mask_compress_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm512_maskz_compress_ps",
        "full_name": "__m512 _mm512_maskz_compress_ps(__mmask16 k, __m512 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm512_mask_compress_pd",
        "full_name": "__m512d _mm512_mask_compress_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm512_maskz_compress_pd",
        "full_name": "__m512d _mm512_maskz_compress_pd(__mmask8 k, __m512d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm_mask_expand_epi32",
        "full_name": "__m128i _mm_mask_expand_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Load contiguous active 32-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expand_epi32",
        "full_name": "__m128i _mm_maskz_expand_epi32(__mmask8 k, __m128i a);",
        "description": "Load contiguous active 32-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_test_epi8_mask",
        "full_name": "__mmask16 _mm_test_epi8_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm_mask_test_epi8_mask",
        "full_name": "__mmask16 _mm_mask_test_epi8_mask(__mmask16 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm_testn_epi8_mask",
        "full_name": "__mmask16 _mm_testn_epi8_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm_mask_testn_epi8_mask",
        "full_name": "__mmask16 _mm_mask_testn_epi8_mask(__mmask16 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm_test_epi16_mask",
        "full_name": "__mmask8 _mm_test_epi16_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm_mask_test_epi16_mask",
        "full_name": "__mmask8 _mm_mask_test_epi16_mask(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm_testn_epi16_mask",
        "full_name": "__mmask8 _mm_testn_epi16_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm_mask_testn_epi16_mask",
        "full_name": "__mmask8 _mm_mask_testn_epi16_mask(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm_test_epi32_mask",
        "full_name": "__mmask8 _mm_test_epi32_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm_mask_test_epi32_mask",
        "full_name": "__mmask8 _mm_mask_test_epi32_mask(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm_testn_epi32_mask",
        "full_name": "__mmask8 _mm_testn_epi32_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm_mask_testn_epi32_mask",
        "full_name": "__mmask8 _mm_mask_testn_epi32_mask(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm_test_epi64_mask",
        "full_name": "__mmask8 _mm_test_epi64_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm_mask_test_epi64_mask",
        "full_name": "__mmask8 _mm_mask_test_epi64_mask(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm_testn_epi64_mask",
        "full_name": "__mmask8 _mm_testn_epi64_mask(__m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm_mask_testn_epi64_mask",
        "full_name": "__mmask8 _mm_mask_testn_epi64_mask(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compute the bitwise NAND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm256_test_epi8_mask",
        "full_name": "__mmask32 _mm256_test_epi8_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_mask_test_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_test_epi8_mask(__mmask32 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_testn_epi8_mask",
        "full_name": "__mmask32 _mm256_testn_epi8_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm256_mask_testn_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_testn_epi8_mask(__mmask32 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm256_test_epi16_mask",
        "full_name": "__mmask16 _mm256_test_epi16_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_mask_test_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_test_epi16_mask(__mmask16 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_testn_epi16_mask",
        "full_name": "__mmask16 _mm256_testn_epi16_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm256_mask_testn_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_testn_epi16_mask(__mmask16 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm256_test_epi32_mask",
        "full_name": "__mmask8 _mm256_test_epi32_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_mask_test_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_test_epi32_mask(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_testn_epi32_mask",
        "full_name": "__mmask8 _mm256_testn_epi32_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm256_mask_testn_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_testn_epi32_mask(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm256_test_epi64_mask",
        "full_name": "__mmask8 _mm256_test_epi64_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_mask_test_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_test_epi64_mask(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm256_testn_epi64_mask",
        "full_name": "__mmask8 _mm256_testn_epi64_mask(__m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm256_mask_testn_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_testn_epi64_mask(__mmask8 k, __m256i a, __m256i b);",
        "description": "Compute the bitwise NAND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm512_mask_test_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_test_epi8_mask(__mmask64 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_testn_epi8_mask",
        "full_name": "__mmask64 _mm512_testn_epi8_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm512_mask_testn_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_testn_epi8_mask(__mmask64 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 8-bit integers in \"a\" and \"b\", producing intermediate 8-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm512_test_epi16_mask",
        "full_name": "__mmask32 _mm512_test_epi16_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_mask_test_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_test_epi16_mask(__mmask32 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_testn_epi16_mask",
        "full_name": "__mmask32 _mm512_testn_epi16_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm512_mask_testn_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_testn_epi16_mask(__mmask32 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 16-bit integers in \"a\" and \"b\", producing intermediate 16-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm512_mask_test_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_test_epi32_mask(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_testn_epi32_mask",
        "full_name": "__mmask16 _mm512_testn_epi32_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm512_mask_testn_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_testn_epi32_mask(__mmask16 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 32-bit integers in \"a\" and \"b\", producing intermediate 32-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm512_mask_test_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_test_epi64_mask(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is non-zero."
    },
    {
        "name": "_mm512_testn_epi64_mask",
        "full_name": "__mmask8 _mm512_testn_epi64_mask(__m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" if the intermediate value is zero."
    },
    {
        "name": "_mm512_mask_testn_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_testn_epi64_mask(__mmask8 k, __m512i a, __m512i b);",
        "description": "Compute the bitwise NAND of packed 64-bit integers in \"a\" and \"b\", producing intermediate 64-bit values, and set the corresponding bit in result mask \"k\" (subject to writemask \"k\") if the intermediate value is zero."
    },
    {
        "name": "_mm_mask_shuffle_epi8",
        "full_name": "__m128i _mm_mask_shuffle_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_shuffle_epi8",
        "full_name": "__m128i _mm_maskz_shuffle_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_shuffle_ps",
        "full_name": "__m128 _mm_shuffle_ps(__m128 a, __m128 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_shuffle_ps",
        "full_name": "__m128 _mm_mask_shuffle_ps(__m128 src, __mmask8 k, __m128 a, __m128 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_shuffle_ps",
        "full_name": "__m128 _mm_maskz_shuffle_ps(__mmask8 k, __m128 a, __m128 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_shuffle_pd",
        "full_name": "__m128d _mm_shuffle_pd(__m128d a, __m128d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_shuffle_pd",
        "full_name": "__m128d _mm_mask_shuffle_pd(__m128d src, __mmask8 k, __m128d a, __m128d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_shuffle_pd",
        "full_name": "__m128d _mm_maskz_shuffle_pd(__mmask8 k, __m128d a, __m128d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_shuffle_epi8",
        "full_name": "__m256i _mm256_mask_shuffle_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_epi8",
        "full_name": "__m256i _mm256_maskz_shuffle_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shuffle_ps",
        "full_name": "__m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_shuffle_ps",
        "full_name": "__m256 _mm256_mask_shuffle_ps(__m256 src, __mmask8 k, __m256 a, __m256 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_ps",
        "full_name": "__m256 _mm256_maskz_shuffle_ps(__mmask8 k, __m256 a, __m256 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shuffle_pd",
        "full_name": "__m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_shuffle_pd",
        "full_name": "__m256d _mm256_mask_shuffle_pd(__m256d src, __mmask8 k, __m256d a, __m256d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_pd",
        "full_name": "__m256d _mm256_maskz_shuffle_pd(__mmask8 k, __m256d a, __m256d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_shuffle_epi8",
        "full_name": "__m512i _mm512_mask_shuffle_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Shuffle 8-bit integers in \"a\" within 128-bit lanes using the control in the corresponding 8-bit element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shuffle_ps",
        "full_name": "__m512 _mm512_shuffle_ps(__m512 a, __m512 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_shuffle_ps",
        "full_name": "__m512 _mm512_mask_shuffle_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shuffle_ps",
        "full_name": "__m512 _mm512_maskz_shuffle_ps(__mmask16 k, __m512 a, __m512 b, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shuffle_pd",
        "full_name": "__m512d _mm512_shuffle_pd(__m512d a, __m512d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_shuffle_pd",
        "full_name": "__m512d _mm512_mask_shuffle_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shuffle_pd",
        "full_name": "__m512d _mm512_maskz_shuffle_pd(__mmask8 k, __m512d a, __m512d b, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_broadcast_i32x2",
        "full_name": "__m128i _mm_broadcast_i32x2(__m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst."
    },
    {
        "name": "_mm_mask_broadcast_i32x2",
        "full_name": "__m128i _mm_mask_broadcast_i32x2(__m128i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_broadcast_i32x2",
        "full_name": "__m128i _mm_maskz_broadcast_i32x2(__mmask8 k, __m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcast_i32x2",
        "full_name": "__m256i _mm256_broadcast_i32x2(__m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst."
    },
    {
        "name": "_mm256_mask_broadcast_i32x2",
        "full_name": "__m256i _mm256_mask_broadcast_i32x2(__m256i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcast_i32x2",
        "full_name": "__m256i _mm256_maskz_broadcast_i32x2(__mmask8 k, __m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_i32x2",
        "full_name": "__m512i _mm512_broadcast_i32x2(__m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst."
    },
    {
        "name": "_mm512_mask_broadcast_i32x2",
        "full_name": "__m512i _mm512_mask_broadcast_i32x2(__m512i src, __mmask16 k, __m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_i32x2",
        "full_name": "__m512i _mm512_maskz_broadcast_i32x2(__mmask16 k, __m128i a);",
        "description": "Broadcast the lower 2 packed 32-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcast_i32x4",
        "full_name": "__m256i _mm256_broadcast_i32x4(__m128i a);",
        "description": "Broadcast the 4 packed 32-bit integers from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcast_i32x4",
        "full_name": "__m256i _mm256_mask_broadcast_i32x4(__m256i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the 4 packed 32-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcast_i32x4",
        "full_name": "__m256i _mm256_maskz_broadcast_i32x4(__mmask8 k, __m128i a);",
        "description": "Broadcast the 4 packed 32-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_broadcast_i32x4",
        "full_name": "__m512i _mm512_mask_broadcast_i32x4(__m512i src, __mmask16 k, __m128i a);",
        "description": "Broadcast the 4 packed 32-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_i32x4",
        "full_name": "__m512i _mm512_maskz_broadcast_i32x4(__mmask16 k, __m128i a);",
        "description": "Broadcast the 4 packed 32-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_i32x8",
        "full_name": "__m512i _mm512_broadcast_i32x8(__m256i a);",
        "description": "Broadcast the 8 packed 32-bit integers from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_i32x8",
        "full_name": "__m512i _mm512_mask_broadcast_i32x8(__m512i src, __mmask16 k, __m256i a);",
        "description": "Broadcast the 8 packed 32-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_i32x8",
        "full_name": "__m512i _mm512_maskz_broadcast_i32x8(__mmask16 k, __m256i a);",
        "description": "Broadcast the 8 packed 32-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcast_i64x2",
        "full_name": "__m256i _mm256_broadcast_i64x2(__m128i a);",
        "description": "Broadcast the 2 packed 64-bit integers from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcast_i64x2",
        "full_name": "__m256i _mm256_mask_broadcast_i64x2(__m256i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the 2 packed 64-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcast_i64x2",
        "full_name": "__m256i _mm256_maskz_broadcast_i64x2(__mmask8 k, __m128i a);",
        "description": "Broadcast the 2 packed 64-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcast_i64x2",
        "full_name": "__m512i _mm512_broadcast_i64x2(__m128i a);",
        "description": "Broadcast the 2 packed 64-bit integers from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_i64x2",
        "full_name": "__m512i _mm512_mask_broadcast_i64x2(__m512i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the 2 packed 64-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_i64x2",
        "full_name": "__m512i _mm512_maskz_broadcast_i64x2(__mmask8 k, __m128i a);",
        "description": "Broadcast the 2 packed 64-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcast_i64x4",
        "full_name": "__m512i _mm512_maskz_broadcast_i64x4(__mmask8 k, __m256i a);",
        "description": "Broadcast the 4 packed 64-bit integers from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcastb_epi8",
        "full_name": "__m256i _mm256_broadcastb_epi8(__m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcastb_epi8",
        "full_name": "__m256i _mm256_mask_broadcastb_epi8(__m256i src, __mmask32 k, __m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcastb_epi8",
        "full_name": "__m256i _mm256_maskz_broadcastb_epi8(__mmask32 k, __m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcastb_epi8",
        "full_name": "__m512i _mm512_broadcastb_epi8(__m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcastb_epi8",
        "full_name": "__m512i _mm512_mask_broadcastb_epi8(__m512i src, __mmask64 k, __m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcastb_epi8",
        "full_name": "__m512i _mm512_maskz_broadcastb_epi8(__mmask64 k, __m128i a);",
        "description": "Broadcast the low packed 8-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcastd_epi32",
        "full_name": "__m256i _mm256_broadcastd_epi32(__m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcastd_epi32",
        "full_name": "__m256i _mm256_mask_broadcastd_epi32(__m256i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcastd_epi32",
        "full_name": "__m256i _mm256_maskz_broadcastd_epi32(__mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcastd_epi32",
        "full_name": "__m512i _mm512_broadcastd_epi32(__m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcastd_epi32",
        "full_name": "__m512i _mm512_mask_broadcastd_epi32(__m512i src, __mmask16 k, __m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcastd_epi32",
        "full_name": "__m512i _mm512_maskz_broadcastd_epi32(__mmask16 k, __m128i a);",
        "description": "Broadcast the low packed 32-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_broadcastq_epi64",
        "full_name": "__m256i _mm256_mask_broadcastq_epi64(__m256i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcastq_epi64",
        "full_name": "__m256i _mm256_maskz_broadcastq_epi64(__mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcastq_epi64",
        "full_name": "__m512i _mm512_broadcastq_epi64(__m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcastq_epi64",
        "full_name": "__m512i _mm512_mask_broadcastq_epi64(__m512i src, __mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcastq_epi64",
        "full_name": "__m512i _mm512_maskz_broadcastq_epi64(__mmask8 k, __m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcastsd_pd",
        "full_name": "__m256d _mm256_broadcastsd_pd(__m128d a);",
        "description": "Broadcast the low double-precision (64-bit) floating-point element from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcastsd_pd",
        "full_name": "__m256d _mm256_mask_broadcastsd_pd(__m256d src, __mmask8 k, __m128d a);",
        "description": "Broadcast the low double-precision (64-bit) floating-point element from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcastsd_pd",
        "full_name": "__m256d _mm256_maskz_broadcastsd_pd(__mmask8 k, __m128d a);",
        "description": "Broadcast the low double-precision (64-bit) floating-point element from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcastsd_pd",
        "full_name": "__m512d _mm512_broadcastsd_pd(__m128d a);",
        "description": "Broadcast the low double-precision (64-bit) floating-point element from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcastsd_pd",
        "full_name": "__m512d _mm512_mask_broadcastsd_pd(__m512d src, __mmask8 k, __m128d a);",
        "description": "Broadcast the low double-precision (64-bit) floating-point element from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcastsd_pd",
        "full_name": "__m512d _mm512_maskz_broadcastsd_pd(__mmask8 k, __m128d a);",
        "description": "Broadcast the low double-precision (64-bit) floating-point element from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_broadcastsi128_si256",
        "full_name": "__m256i _mm_broadcastsi128_si256(__m128i a);",
        "description": "Broadcast 128 bits of integer data from \"a\" to all 128-bit lanes in \"dst\"."
    },
    {
        "name": "_mm256_broadcastss_ps",
        "full_name": "__m256 _mm256_broadcastss_ps(__m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcastss_ps",
        "full_name": "__m256 _mm256_mask_broadcastss_ps(__m256 src, __mmask8 k, __m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcastss_ps",
        "full_name": "__m256 _mm256_maskz_broadcastss_ps(__mmask8 k, __m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcastss_ps",
        "full_name": "__m512 _mm512_broadcastss_ps(__m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcastss_ps",
        "full_name": "__m512 _mm512_mask_broadcastss_ps(__m512 src, __mmask16 k, __m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_broadcastss_ps",
        "full_name": "__m512 _mm512_maskz_broadcastss_ps(__mmask16 k, __m128 a);",
        "description": "Broadcast the low single-precision (32-bit) floating-point element from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_broadcastw_epi16",
        "full_name": "__m256i _mm256_broadcastw_epi16(__m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_mask_broadcastw_epi16",
        "full_name": "__m256i _mm256_mask_broadcastw_epi16(__m256i src, __mmask16 k, __m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_broadcastw_epi16",
        "full_name": "__m256i _mm256_maskz_broadcastw_epi16(__mmask16 k, __m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_broadcastw_epi16",
        "full_name": "__m512i _mm512_broadcastw_epi16(__m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcastw_epi16",
        "full_name": "__m512i _mm512_mask_broadcastw_epi16(__m512i src, __mmask32 k, __m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_madd_epi16",
        "full_name": "__m128i _mm_mask_madd_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_madd_epi16",
        "full_name": "__m128i _mm_maskz_madd_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_madd_epi16",
        "full_name": "__m256i _mm256_madd_epi16(__m256i a, __m256i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_madd_epi16",
        "full_name": "__m256i _mm256_mask_madd_epi16(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_madd_epi16",
        "full_name": "__m256i _mm256_maskz_madd_epi16(__mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_madd_epi16",
        "full_name": "__m512i _mm512_madd_epi16(__m512i a, __m512i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_madd_epi16",
        "full_name": "__m512i _mm512_mask_madd_epi16(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_madd_epi16",
        "full_name": "__m512i _mm512_maskz_madd_epi16(__mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_reduce_add_epi32",
        "full_name": "int _mm512_mask_reduce_add_epi32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by addition using mask \"k\". Returns the sum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_add_epi32",
        "full_name": "int _mm512_reduce_add_epi32(__m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by addition. Returns the sum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_add_epi64",
        "full_name": "__int64 _mm512_mask_reduce_add_epi64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by addition using mask \"k\". Returns the sum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_add_epi64",
        "full_name": "__int64 _mm512_reduce_add_epi64(__m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by addition. Returns the sum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_add_pd",
        "full_name": "double _mm512_mask_reduce_add_pd(__mmask8 k, __m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by addition using mask \"k\". Returns the sum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_add_pd",
        "full_name": "double _mm512_reduce_add_pd(__m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by addition. Returns the sum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_add_ps",
        "full_name": "float _mm512_mask_reduce_add_ps(__mmask16 k, __m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by addition using mask \"k\". Returns the sum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_add_ps",
        "full_name": "float _mm512_reduce_add_ps(__m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by addition. Returns the sum of all elements in \"a\"."
    },
    {
        "name": "_mm512_maskz_broadcastw_epi16",
        "full_name": "__m512i _mm512_maskz_broadcastw_epi16(__mmask32 k, __m128i a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_adds_epi8",
        "full_name": "__m128i _mm_mask_adds_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_adds_epi8",
        "full_name": "__m128i _mm_maskz_adds_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_adds_epi8",
        "full_name": "__m256i _mm256_mask_adds_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_adds_epi8",
        "full_name": "__m256i _mm256_maskz_adds_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_adds_epi16",
        "full_name": "__m128i _mm_mask_adds_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_adds_epi16",
        "full_name": "__m128i _mm_maskz_adds_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_adds_epi16",
        "full_name": "__m256i _mm256_mask_adds_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_adds_epi16",
        "full_name": "__m256i _mm256_maskz_adds_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_adds_epi16",
        "full_name": "__m512i _mm512_mask_adds_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_adds_epi16",
        "full_name": "__m512i _mm512_maskz_adds_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_idiv_epi32",
        "full_name": "__m128i _mm_idiv_epi32(__m128i a, __m128i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_idiv_epi32",
        "full_name": "__m256i _mm256_idiv_epi32(__m256i a, __m256i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_cmpeq_epi16_mask",
        "full_name": "__mmask8 _mm_cmpeq_epi16_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpeq_epi16_mask",
        "full_name": "__mmask8 _mm_mask_cmpeq_epi16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epi16_mask",
        "full_name": "__mmask16 _mm256_cmpeq_epi16_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpeq_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpeq_epi16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpeq_epi16_mask",
        "full_name": "__mmask32 _mm512_cmpeq_epi16_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpeq_epi16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpeq_epi8_mask",
        "full_name": "__mmask16 _mm_cmpeq_epi8_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpeq_epi8_mask",
        "full_name": "__mmask16 _mm_mask_cmpeq_epi8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epi8_mask",
        "full_name": "__mmask32 _mm256_cmpeq_epi8_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpeq_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpeq_epi8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpeq_epu16_mask",
        "full_name": "__mmask8 _mm_cmpeq_epu16_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpeq_epu16_mask",
        "full_name": "__mmask8 _mm_mask_cmpeq_epu16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epu16_mask",
        "full_name": "__mmask16 _mm256_cmpeq_epu16_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpeq_epu16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpeq_epu16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpeq_epu16_mask",
        "full_name": "__mmask32 _mm512_cmpeq_epu16_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_epu16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpeq_epu16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpeq_epu8_mask",
        "full_name": "__mmask16 _mm_cmpeq_epu8_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpeq_epu8_mask",
        "full_name": "__mmask16 _mm_mask_cmpeq_epu8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epu8_mask",
        "full_name": "__mmask32 _mm256_cmpeq_epu8_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpeq_epu8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpeq_epu8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpeq_epu8_mask",
        "full_name": "__mmask64 _mm512_cmpeq_epu8_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_epu8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpeq_epu8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpneq_epi16_mask",
        "full_name": "__mmask8 _mm_cmpneq_epi16_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpneq_epi16_mask",
        "full_name": "__mmask8 _mm_mask_cmpneq_epi16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpneq_epi16_mask",
        "full_name": "__mmask16 _mm256_cmpneq_epi16_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpneq_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpneq_epi16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpneq_epi16_mask",
        "full_name": "__mmask32 _mm512_cmpneq_epi16_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpneq_epi16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpneq_epi8_mask",
        "full_name": "__mmask16 _mm_cmpneq_epi8_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpneq_epi8_mask",
        "full_name": "__mmask16 _mm_mask_cmpneq_epi8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpneq_epi8_mask",
        "full_name": "__mmask32 _mm256_cmpneq_epi8_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpneq_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpneq_epi8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpneq_epu16_mask",
        "full_name": "__mmask8 _mm_cmpneq_epu16_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpneq_epu16_mask",
        "full_name": "__mmask8 _mm_mask_cmpneq_epu16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpneq_epu16_mask",
        "full_name": "__mmask16 _mm256_cmpneq_epu16_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpneq_epu16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpneq_epu16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpneq_epu16_mask",
        "full_name": "__mmask32 _mm512_cmpneq_epu16_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_epu16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpneq_epu16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpneq_epu8_mask",
        "full_name": "__mmask16 _mm_cmpneq_epu8_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpneq_epu8_mask",
        "full_name": "__mmask16 _mm_mask_cmpneq_epu8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpneq_epu8_mask",
        "full_name": "__mmask32 _mm256_cmpneq_epu8_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpneq_epu8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpneq_epu8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpneq_epu8_mask",
        "full_name": "__mmask64 _mm512_cmpneq_epu8_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_epu8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpneq_epu8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpeq_pd_mask",
        "full_name": "__mmask8 _mm512_cmpeq_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmpeq_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_div_round_pd",
        "full_name": "__m512d _mm512_mask_div_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_div_round_pd",
        "full_name": "__m512d _mm512_maskz_div_round_pd(__mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_div_round_ps",
        "full_name": "__m512 _mm512_mask_div_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm256_blend_epi16",
        "full_name": "__m256i _mm256_blend_epi16(__m256i a, __m256i b, const int imm8);",
        "description": "Blend packed 16-bit integers from \"a\" and \"b\" within 128-bit lanes using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_blend_epi32",
        "full_name": "__m128i _mm_blend_epi32(__m128i a, __m128i b, const int imm8);",
        "description": "Blend packed 32-bit integers from \"a\" and \"b\" using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_blend_epi32",
        "full_name": "__m256i _mm256_blend_epi32(__m256i a, __m256i b, const int imm8);",
        "description": "Blend packed 32-bit integers from \"a\" and \"b\" using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_setzero",
        "full_name": "__m512 _mm512_setzero(void);",
        "description": "Return vector of type __m512 with all elements set to zero."
    },
    {
        "name": "_mm512_setzero_epi32",
        "full_name": "__m512i _mm512_setzero_epi32();",
        "description": "Return vector of type __m512i with all elements set to zero."
    },
    {
        "name": "_mm256_setr_m128i",
        "full_name": "__m256i _mm256_setr_m128i(__m128i lo, __m128i hi);",
        "description": "Set packed __m256i vector \"dst\" with the supplied values."
    },
    {
        "name": "_mm512_permutexvar_ps",
        "full_name": "__m512 _mm512_permutexvar_ps(__m512i idx, __m512 a);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\"."
    },
    {
        "name": "_mm512_mask_permutexvar_ps",
        "full_name": "__m512 _mm512_mask_permutexvar_ps(__m512 src, __mmask16 k, __m512i idx, __m512 a);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutexvar_ps",
        "full_name": "__m512 _mm512_maskz_permutexvar_ps(__mmask16 k, __m512i idx, __m512 a);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_srav_epi16",
        "full_name": "__m128i _mm_srav_epi16(__m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srav_epi16",
        "full_name": "__m128i _mm_mask_srav_epi16(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srav_epi16",
        "full_name": "__m128i _mm_maskz_srav_epi16(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srav_epi16",
        "full_name": "__m256i _mm256_mask_srav_epi16(__m256i src, __mmask16 k, __m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srav_epi16",
        "full_name": "__m256i _mm256_maskz_srav_epi16(__mmask16 k, __m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srav_epi16",
        "full_name": "__m256i _mm256_srav_epi16(__m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srav_epi16",
        "full_name": "__m512i _mm512_mask_srav_epi16(__m512i src, __mmask32 k, __m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srav_epi16",
        "full_name": "__m512i _mm512_maskz_srav_epi16(__mmask32 k, __m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srav_epi16",
        "full_name": "__m512i _mm512_srav_epi16(__m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srav_epi32",
        "full_name": "__m128i _mm_srav_epi32(__m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srav_epi32",
        "full_name": "__m128i _mm_mask_srav_epi32(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srav_epi32",
        "full_name": "__m128i _mm_maskz_srav_epi32(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srav_epi32",
        "full_name": "__m256i _mm256_mask_srav_epi32(__m256i src, __mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srav_epi32",
        "full_name": "__m256i _mm256_maskz_srav_epi32(__mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_srav_epi32",
        "full_name": "__m512i _mm512_mask_srav_epi32(__m512i src, __mmask16 k, __m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srav_epi32",
        "full_name": "__m512i _mm512_maskz_srav_epi32(__mmask16 k, __m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srav_epi32",
        "full_name": "__m256i _mm256_srav_epi32(__m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_srav_epi32",
        "full_name": "__m512i _mm512_srav_epi32(__m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_srav_epi64",
        "full_name": "__m128i _mm_srav_epi64(__m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srav_epi64",
        "full_name": "__m128i _mm_mask_srav_epi64(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srav_epi64",
        "full_name": "__m128i _mm_maskz_srav_epi64(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srav_epi64",
        "full_name": "__m256i _mm256_mask_srav_epi64(__m256i src, __mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srav_epi64",
        "full_name": "__m256i _mm256_maskz_srav_epi64(__mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srav_epi64",
        "full_name": "__m256i _mm256_srav_epi64(__m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srav_epi64",
        "full_name": "__m512i _mm512_mask_srav_epi64(__m512i src, __mmask8 k, __m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srav_epi64",
        "full_name": "__m512i _mm512_maskz_srav_epi64(__mmask8 k, __m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srav_epi64",
        "full_name": "__m512i _mm512_srav_epi64(__m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srl_epi16",
        "full_name": "__m128i _mm_mask_srl_epi16(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srl_epi16",
        "full_name": "__m128i _mm_maskz_srl_epi16(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srl_epi16",
        "full_name": "__m256i _mm256_mask_srl_epi16(__m256i src, __mmask16 k, __m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srl_epi16",
        "full_name": "__m256i _mm256_maskz_srl_epi16(__mmask16 k, __m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_srl_epi16",
        "full_name": "__m512i _mm512_mask_srl_epi16(__m512i src, __mmask32 k, __m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_srli_epi64",
        "full_name": "__m128i _mm_mask_srli_epi64(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srli_epi64",
        "full_name": "__m128i _mm_maskz_srli_epi64(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srli_epi64",
        "full_name": "__m256i _mm256_mask_srli_epi64(__m256i src, __mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srli_epi64",
        "full_name": "__m256i _mm256_maskz_srli_epi64(__mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_srli_epi32",
        "full_name": "__m128i _mm_mask_srli_epi32(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srli_epi32",
        "full_name": "__m128i _mm_maskz_srli_epi32(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srli_epi32",
        "full_name": "__m256i _mm256_mask_srli_epi32(__m256i src, __mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srli_epi32",
        "full_name": "__m256i _mm256_maskz_srli_epi32(__mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_srli_epi16",
        "full_name": "__m128i _mm_srli_epi16(__m128i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srli_epi16",
        "full_name": "__m128i _mm_mask_srli_epi16(__m128i src, __mmask8 k, __m128i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srli_epi16",
        "full_name": "__m128i _mm_maskz_srli_epi16(__mmask8 k, __m128i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srli_epi16",
        "full_name": "__m256i _mm256_mask_srli_epi16(__m256i src, __mmask16 k, __m256i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srli_epi16",
        "full_name": "__m256i _mm256_maskz_srli_epi16(__mmask16 k, __m256i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_srl_epi32",
        "full_name": "__m128i _mm_mask_srl_epi32(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srl_epi32",
        "full_name": "__m128i _mm_maskz_srl_epi32(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srl_epi32",
        "full_name": "__m256i _mm256_srl_epi32(__m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_srl_epi32",
        "full_name": "__m256i _mm256_mask_srl_epi32(__m256i src, __mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srl_epi32",
        "full_name": "__m256i _mm256_maskz_srl_epi32(__mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_srl_epi64",
        "full_name": "__m128i _mm_mask_srl_epi64(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srl_epi64",
        "full_name": "__m128i _mm_maskz_srl_epi64(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srl_epi64",
        "full_name": "__m256i _mm256_mask_srl_epi64(__m256i src, __mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srl_epi64",
        "full_name": "__m256i _mm256_maskz_srl_epi64(__mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_srlv_epi16",
        "full_name": "__m128i _mm_srlv_epi16(__m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srlv_epi16",
        "full_name": "__m128i _mm_mask_srlv_epi16(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srlv_epi16",
        "full_name": "__m128i _mm_maskz_srlv_epi16(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srlv_epi16",
        "full_name": "__m256i _mm256_srlv_epi16(__m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_srlv_epi16",
        "full_name": "__m256i _mm256_mask_srlv_epi16(__m256i src, __mmask16 k, __m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srlv_epi16",
        "full_name": "__m256i _mm256_maskz_srlv_epi16(__mmask16 k, __m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sllv_epi16",
        "full_name": "__m128i _mm_sllv_epi16(__m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_sllv_epi16",
        "full_name": "__m128i _mm_mask_sllv_epi16(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sllv_epi16",
        "full_name": "__m128i _mm_maskz_sllv_epi16(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_sllv_epi16",
        "full_name": "__m256i _mm256_sllv_epi16(__m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_sllv_epi16",
        "full_name": "__m256i _mm256_mask_sllv_epi16(__m256i src, __mmask16 k, __m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sllv_epi16",
        "full_name": "__m256i _mm256_maskz_sllv_epi16(__mmask16 k, __m256i a, __m256i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sll_epi32",
        "full_name": "__m128i _mm_mask_sll_epi32(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sllv_epi32",
        "full_name": "__m128i _mm_mask_sllv_epi32(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sllv_epi32",
        "full_name": "__m128i _mm_maskz_sllv_epi32(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sll_epi32",
        "full_name": "__m256i _mm256_mask_sll_epi32(__m256i src, __mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sll_epi32",
        "full_name": "__m128i _mm_maskz_sll_epi32(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sllv_epi32",
        "full_name": "__m256i _mm256_mask_sllv_epi32(__m256i src, __mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sllv_epi32",
        "full_name": "__m256i _mm256_maskz_sllv_epi32(__mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sllv_epi64",
        "full_name": "__m128i _mm_sllv_epi64(__m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_sllv_epi64",
        "full_name": "__m128i _mm_mask_sllv_epi64(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sllv_epi64",
        "full_name": "__m128i _mm_maskz_sllv_epi64(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_sllv_epi64",
        "full_name": "__m256i _mm256_sllv_epi64(__m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_sllv_epi64",
        "full_name": "__m256i _mm256_mask_sllv_epi64(__m256i src, __mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sllv_epi64",
        "full_name": "__m256i _mm256_maskz_sllv_epi64(__mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sll_epi16",
        "full_name": "__m128i _mm_mask_sll_epi16(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sll_epi16",
        "full_name": "__m128i _mm_maskz_sll_epi16(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sll_epi16",
        "full_name": "__m256i _mm256_mask_sll_epi16(__m256i src, __mmask16 k, __m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sll_epi16",
        "full_name": "__m256i _mm256_maskz_sll_epi16(__mmask16 k, __m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sll_epi32",
        "full_name": "__m256i _mm256_maskz_sll_epi32(__mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sll_epi64",
        "full_name": "__m128i _mm_mask_sll_epi64(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sll_epi64",
        "full_name": "__m128i _mm_maskz_sll_epi64(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sll_epi64",
        "full_name": "__m256i _mm256_mask_sll_epi64(__m256i src, __mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sll_epi64",
        "full_name": "__m256i _mm256_maskz_sll_epi64(__mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_slli_epi16",
        "full_name": "__m128i _mm_mask_slli_epi16(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_slli_epi16",
        "full_name": "__m128i _mm_maskz_slli_epi16(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_slli_epi16",
        "full_name": "__m256i _mm256_mask_slli_epi16(__m256i src, __mmask16 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_slli_epi16",
        "full_name": "__m256i _mm256_maskz_slli_epi16(__mmask16 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_slli_epi32",
        "full_name": "__m128i _mm_mask_slli_epi32(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_slli_epi32",
        "full_name": "__m256i _mm256_mask_slli_epi32(__m256i src, __mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_slli_epi32",
        "full_name": "__m256i _mm256_maskz_slli_epi32(__mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_slli_epi64",
        "full_name": "__m128i _mm_mask_slli_epi64(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_slli_epi64",
        "full_name": "__m256i _mm256_mask_slli_epi64(__m256i src, __mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_slli_epi64",
        "full_name": "__m256i _mm256_maskz_slli_epi64(__mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sra_epi16",
        "full_name": "__m128i _mm_mask_sra_epi16(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sra_epi16",
        "full_name": "__m128i _mm_maskz_sra_epi16(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sra_epi16",
        "full_name": "__m256i _mm256_mask_sra_epi16(__m256i src, __mmask16 k, __m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sra_epi16",
        "full_name": "__m256i _mm256_maskz_sra_epi16(__mmask16 k, __m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_srai_epi16",
        "full_name": "__m128i _mm_mask_srai_epi16(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srai_epi16",
        "full_name": "__m128i _mm_maskz_srai_epi16(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srai_epi16",
        "full_name": "__m256i _mm256_srai_epi16(__m256i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_srai_epi16",
        "full_name": "__m256i _mm256_mask_srai_epi16(__m256i src, __mmask16 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srai_epi16",
        "full_name": "__m256i _mm256_maskz_srai_epi16(__mmask16 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_sra_epi32",
        "full_name": "__m128i _mm_mask_sra_epi32(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sra_epi32",
        "full_name": "__m128i _mm_maskz_sra_epi32(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sra_epi32",
        "full_name": "__m256i _mm256_mask_sra_epi32(__m256i src, __mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sra_epi32",
        "full_name": "__m256i _mm256_maskz_sra_epi32(__mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srai_epi32",
        "full_name": "__m256i _mm256_srai_epi32(__m256i a, int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srai_epi32",
        "full_name": "__m128i _mm_mask_srai_epi32(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srai_epi32",
        "full_name": "__m128i _mm_maskz_srai_epi32(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_srai_epi32",
        "full_name": "__m256i _mm256_mask_srai_epi32(__m256i src, __mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srai_epi32",
        "full_name": "__m256i _mm256_maskz_srai_epi32(__mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sra_epi64",
        "full_name": "__m128i _mm_sra_epi64(__m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sra_epi64",
        "full_name": "__m256i _mm256_sra_epi64(__m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_sra_epi64",
        "full_name": "__m128i _mm_mask_sra_epi64(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sra_epi64",
        "full_name": "__m128i _mm_maskz_sra_epi64(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_sra_epi64",
        "full_name": "__m256i _mm256_mask_sra_epi64(__m256i src, __mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sra_epi64",
        "full_name": "__m256i _mm256_maskz_sra_epi64(__mmask8 k, __m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_srai_epi64",
        "full_name": "__m128i _mm_srai_epi64(__m128i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srai_epi64",
        "full_name": "__m128i _mm_mask_srai_epi64(__m128i src, __mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srai_epi64",
        "full_name": "__m128i _mm_maskz_srai_epi64(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srai_epi64",
        "full_name": "__m256i _mm256_srai_epi64(__m256i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_srai_epi64",
        "full_name": "__m256i _mm256_mask_srai_epi64(__m256i src, __mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srai_epi64",
        "full_name": "__m256i _mm256_maskz_srai_epi64(__mmask8 k, __m256i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_rol_epi32",
        "full_name": "__m128i _mm_rol_epi32(__m128i a, int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_rol_epi32",
        "full_name": "__m256i _mm256_rol_epi32(__m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_rolv_epi64",
        "full_name": "__m128i _mm_rolv_epi64(__m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_rolv_epi64",
        "full_name": "__m128i _mm_mask_rolv_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_rolv_epi64",
        "full_name": "__m128i _mm_maskz_rolv_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_rolv_epi64",
        "full_name": "__m256i _mm256_rolv_epi64(__m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_rol_epi32",
        "full_name": "__m128i _mm_mask_rol_epi32(__m128i src, __mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_rol_epi32",
        "full_name": "__m128i _mm_maskz_rol_epi32(__mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_rol_epi32",
        "full_name": "__m256i _mm256_mask_rol_epi32(__m256i src, __mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_rol_epi32",
        "full_name": "__m256i _mm256_maskz_rol_epi32(__mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_rol_epi64",
        "full_name": "__m128i _mm_rol_epi64(__m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_rol_epi64",
        "full_name": "__m128i _mm_mask_rol_epi64(__m128i src, __mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_rol_epi64",
        "full_name": "__m128i _mm_maskz_rol_epi64(__mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_rol_epi64",
        "full_name": "__m256i _mm256_rol_epi64(__m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_rol_epi64",
        "full_name": "__m256i _mm256_mask_rol_epi64(__m256i src, __mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_rol_epi64",
        "full_name": "__m256i _mm256_maskz_rol_epi64(__mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_rolv_epi32",
        "full_name": "__m128i _mm_mask_rolv_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_rolv_epi32",
        "full_name": "__m128i _mm_maskz_rolv_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_rolv_epi32",
        "full_name": "__m256i _mm256_mask_rolv_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_rolv_epi32",
        "full_name": "__m256i _mm256_maskz_rolv_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_rorv_epi64",
        "full_name": "__m128i _mm_rorv_epi64(__m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_rorv_epi64",
        "full_name": "__m128i _mm_mask_rorv_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_rorv_epi64",
        "full_name": "__m128i _mm_maskz_rorv_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_rorv_epi64",
        "full_name": "__m256i _mm256_mask_rorv_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_rorv_epi64",
        "full_name": "__m256i _mm256_maskz_rorv_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_rolv_epi64",
        "full_name": "__m256i _mm256_mask_rolv_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_rolv_epi64",
        "full_name": "__m256i _mm256_maskz_rolv_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_ror_epi32",
        "full_name": "__m128i _mm_ror_epi32(__m128i a, int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_ror_epi32",
        "full_name": "__m128i _mm_mask_ror_epi32(__m128i src, __mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_ror_epi32",
        "full_name": "__m256i _mm256_ror_epi32(__m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_ror_epi32",
        "full_name": "__m256i _mm256_mask_ror_epi32(__m256i src, __mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_ror_epi32",
        "full_name": "__m256i _mm256_maskz_ror_epi32(__mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_ror_epi64",
        "full_name": "__m128i _mm_ror_epi64(__m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_ror_epi64",
        "full_name": "__m128i _mm_mask_ror_epi64(__m128i src, __mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_ror_epi64",
        "full_name": "__m128i _mm_maskz_ror_epi64(__mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_ror_epi64",
        "full_name": "__m256i _mm256_ror_epi64(__m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_ror_epi64",
        "full_name": "__m256i _mm256_mask_ror_epi64(__m256i src, __mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_ror_epi64",
        "full_name": "__m256i _mm256_maskz_ror_epi64(__mmask8 k, __m256i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_rorv_epi32",
        "full_name": "__m128i _mm_mask_rorv_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_rorv_epi32",
        "full_name": "__m128i _mm_maskz_rorv_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_rorv_epi32",
        "full_name": "__m256i _mm256_mask_rorv_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_rorv_epi32",
        "full_name": "__m256i _mm256_maskz_rorv_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_rorv_epi64",
        "full_name": "__m256i _mm256_rorv_epi64(__m256i a, __m256i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_range_ps",
        "full_name": "__m128 _mm_range_ps(__m128 a, __m128 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_mask_range_ps",
        "full_name": "__m128 _mm_mask_range_ps(__m128 src, __mmask8 k, __m128 a, __m128 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_maskz_range_ps",
        "full_name": "__m128 _mm_maskz_range_ps(__mmask8 k, __m128 a, __m128 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm256_range_ps",
        "full_name": "__m256 _mm256_range_ps(__m256 a, __m256 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm256_mask_range_ps",
        "full_name": "__m256 _mm256_mask_range_ps(__m256 src, __mmask8 k, __m256 a, __m256 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm256_maskz_range_ps",
        "full_name": "__m256 _mm256_maskz_range_ps(__mmask8 k, __m256 a, __m256 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_mask_srlv_epi32",
        "full_name": "__m128i _mm_mask_srlv_epi32(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srlv_epi32",
        "full_name": "__m128i _mm_maskz_srlv_epi32(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_slli_epi32",
        "full_name": "__m128i _mm_maskz_slli_epi32(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_slli_epi64",
        "full_name": "__m128i _mm_maskz_slli_epi64(__mmask8 k, __m128i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_ror_epi32",
        "full_name": "__m128i _mm_maskz_ror_epi32(__mmask8 k, __m128i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_range_round_ss",
        "full_name": "__m128 _mm_range_round_ss(__m128 a, __m128 b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm_range_round_sd",
        "full_name": "__m128d _mm_range_round_sd(__m128d a, __m128d b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm_mask_range_ss",
        "full_name": "__m128 _mm_mask_range_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_maskz_range_ss",
        "full_name": "__m128 _mm_maskz_range_ss(__mmask8 k, __m128 a, __m128 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_mask_range_sd",
        "full_name": "__m128d _mm_mask_range_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_maskz_range_sd",
        "full_name": "__m128d _mm_maskz_range_sd(__mmask8 k, __m128d a, __m128d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm256_srl_epi16",
        "full_name": "__m256i _mm256_srl_epi16(__m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_srl_epi16",
        "full_name": "__m512i _mm512_srl_epi16(__m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_maskz_srl_epi16",
        "full_name": "__m512i _mm512_maskz_srl_epi16(__mmask32 k, __m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srl_epi32",
        "full_name": "__m512i _mm512_srl_epi32(__m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srl_epi32",
        "full_name": "__m512i _mm512_mask_srl_epi32(__m512i src, __mmask16 k, __m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srl_epi32",
        "full_name": "__m512i _mm512_maskz_srl_epi32(__mmask16 k, __m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srl_epi64",
        "full_name": "__m256i _mm256_srl_epi64(__m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_srli_epi16",
        "full_name": "__m256i _mm256_srli_epi16(__m256i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sll_epi16",
        "full_name": "__m256i _mm256_sll_epi16(__m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_slli_epi16",
        "full_name": "__m256i _mm256_slli_epi16(__m256i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sra_epi16",
        "full_name": "__m256i _mm256_sra_epi16(__m256i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sra_epi32",
        "full_name": "__m256i _mm256_sra_epi32(__m256i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_srl_epi64",
        "full_name": "__m512i _mm512_srl_epi64(__m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srl_epi64",
        "full_name": "__m512i _mm512_mask_srl_epi64(__m512i src, __mmask8 k, __m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srl_epi64",
        "full_name": "__m512i _mm512_maskz_srl_epi64(__mmask8 k, __m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_srli_epi64",
        "full_name": "__m512i _mm512_mask_srli_epi64(__m512i src, __mmask8 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srli_epi64",
        "full_name": "__m512i _mm512_maskz_srli_epi64(__mmask8 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_srli_epi32",
        "full_name": "__m512i _mm512_mask_srli_epi32(__m512i src, __mmask16 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srli_epi32",
        "full_name": "__m512i _mm512_maskz_srli_epi32(__mmask16 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srli_epi16",
        "full_name": "__m512i _mm512_srli_epi16(__m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srli_epi16",
        "full_name": "__m512i _mm512_mask_srli_epi16(__m512i src, __mmask32 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srli_epi16",
        "full_name": "__m512i _mm512_maskz_srli_epi16(__mmask32 k, __m512i a, int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srlv_epi16",
        "full_name": "__m512i _mm512_srlv_epi16(__m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srlv_epi16",
        "full_name": "__m512i _mm512_mask_srlv_epi16(__m512i src, __mmask32 k, __m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srlv_epi16",
        "full_name": "__m512i _mm512_maskz_srlv_epi16(__mmask32 k, __m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sll_epi16",
        "full_name": "__m512i _mm512_sll_epi16(__m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sll_epi16",
        "full_name": "__m512i _mm512_mask_sll_epi16(__m512i src, __mmask32 k, __m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sll_epi16",
        "full_name": "__m512i _mm512_maskz_sll_epi16(__mmask32 k, __m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sll_epi32",
        "full_name": "__m512i _mm512_sll_epi32(__m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sll_epi32",
        "full_name": "__m512i _mm512_mask_sll_epi32(__m512i src, __mmask16 k, __m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sll_epi32",
        "full_name": "__m512i _mm512_maskz_sll_epi32(__mmask16 k, __m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sll_epi64",
        "full_name": "__m512i _mm512_mask_sll_epi64(__m512i src, __mmask8 k, __m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sll_epi64",
        "full_name": "__m512i _mm512_maskz_sll_epi64(__mmask8 k, __m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_slli_epi32",
        "full_name": "__m512i _mm512_mask_slli_epi32(__m512i src, __mmask16 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_slli_epi32",
        "full_name": "__m512i _mm512_maskz_slli_epi32(__mmask16 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_slli_epi64",
        "full_name": "__m512i _mm512_mask_slli_epi64(__m512i src, __mmask8 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_slli_epi64",
        "full_name": "__m512i _mm512_maskz_slli_epi64(__mmask8 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_slli_epi16",
        "full_name": "__m512i _mm512_slli_epi16(__m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_slli_epi16",
        "full_name": "__m512i _mm512_mask_slli_epi16(__m512i src, __mmask32 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_slli_epi16",
        "full_name": "__m512i _mm512_maskz_slli_epi16(__mmask32 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sllv_epi16",
        "full_name": "__m512i _mm512_sllv_epi16(__m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sllv_epi16",
        "full_name": "__m512i _mm512_mask_sllv_epi16(__m512i src, __mmask32 k, __m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sllv_epi16",
        "full_name": "__m512i _mm512_maskz_sllv_epi16(__mmask32 k, __m512i a, __m512i count);",
        "description": "Shift packed 16-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_sllv_epi32",
        "full_name": "__m512i _mm512_mask_sllv_epi32(__m512i src, __mmask16 k, __m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sllv_epi32",
        "full_name": "__m512i _mm512_maskz_sllv_epi32(__mmask16 k, __m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sllv_epi64",
        "full_name": "__m512i _mm512_sllv_epi64(__m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sllv_epi64",
        "full_name": "__m512i _mm512_mask_sllv_epi64(__m512i src, __mmask8 k, __m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sllv_epi64",
        "full_name": "__m512i _mm512_maskz_sllv_epi64(__mmask8 k, __m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sra_epi16",
        "full_name": "__m512i _mm512_sra_epi16(__m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sra_epi16",
        "full_name": "__m512i _mm512_mask_sra_epi16(__m512i src, __mmask32 k, __m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sra_epi16",
        "full_name": "__m512i _mm512_maskz_sra_epi16(__mmask32 k, __m512i a, __m128i count);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sra_epi32",
        "full_name": "__m512i _mm512_sra_epi32(__m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sra_epi32",
        "full_name": "__m512i _mm512_mask_sra_epi32(__m512i src, __mmask16 k, __m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sra_epi32",
        "full_name": "__m512i _mm512_maskz_sra_epi32(__mmask16 k, __m512i a, __m128i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sra_epi64",
        "full_name": "__m512i _mm512_sra_epi64(__m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sra_epi64",
        "full_name": "__m512i _mm512_mask_sra_epi64(__m512i src, __mmask8 k, __m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sra_epi64",
        "full_name": "__m512i _mm512_maskz_sra_epi64(__mmask8 k, __m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"count\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srai_epi16",
        "full_name": "__m512i _mm512_srai_epi16(__m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srai_epi16",
        "full_name": "__m512i _mm512_mask_srai_epi16(__m512i src, __mmask32 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srai_epi16",
        "full_name": "__m512i _mm512_maskz_srai_epi16(__mmask32 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 16-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srai_epi32",
        "full_name": "__m512i _mm512_srai_epi32(__m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srai_epi32",
        "full_name": "__m512i _mm512_mask_srai_epi32(__m512i src, __mmask16 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srai_epi32",
        "full_name": "__m512i _mm512_maskz_srai_epi32(__mmask16 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 32-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srai_epi64",
        "full_name": "__m512i _mm512_srai_epi64(__m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srai_epi64",
        "full_name": "__m512i _mm512_mask_srai_epi64(__m512i src, __mmask8 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srai_epi64",
        "full_name": "__m512i _mm512_maskz_srai_epi64(__mmask8 k, __m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in sign bits, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_rol_epi32",
        "full_name": "__m512i _mm512_rol_epi32(__m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_rol_epi32",
        "full_name": "__m512i _mm512_mask_rol_epi32(__m512i src, __mmask16 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_rol_epi32",
        "full_name": "__m512i _mm512_maskz_rol_epi32(__mmask16 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_rol_epi64",
        "full_name": "__m512i _mm512_rol_epi64(__m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_rol_epi64",
        "full_name": "__m512i _mm512_mask_rol_epi64(__m512i src, __mmask8 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_rol_epi64",
        "full_name": "__m512i _mm512_maskz_rol_epi64(__mmask8 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_rolv_epi32",
        "full_name": "__m512i _mm512_mask_rolv_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_rolv_epi32",
        "full_name": "__m512i _mm512_maskz_rolv_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_rolv_epi64",
        "full_name": "__m512i _mm512_rolv_epi64(__m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_rolv_epi64",
        "full_name": "__m512i _mm512_mask_rolv_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_rolv_epi64",
        "full_name": "__m512i _mm512_maskz_rolv_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the left by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_ror_epi32",
        "full_name": "__m512i _mm512_ror_epi32(__m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_ror_epi32",
        "full_name": "__m512i _mm512_mask_ror_epi32(__m512i src, __mmask16 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_ror_epi32",
        "full_name": "__m512i _mm512_maskz_ror_epi32(__mmask16 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_ror_epi64",
        "full_name": "__m512i _mm512_ror_epi64(__m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_ror_epi64",
        "full_name": "__m512i _mm512_mask_ror_epi64(__m512i src, __mmask8 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_ror_epi64",
        "full_name": "__m512i _mm512_maskz_ror_epi64(__mmask8 k, __m512i a, const int imm8);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_rorv_epi32",
        "full_name": "__m512i _mm512_mask_rorv_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_rorv_epi32",
        "full_name": "__m512i _mm512_maskz_rorv_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 32-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_rorv_epi64",
        "full_name": "__m512i _mm512_rorv_epi64(__m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_rorv_epi64",
        "full_name": "__m512i _mm512_mask_rorv_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_rorv_epi64",
        "full_name": "__m512i _mm512_maskz_rorv_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Rotate the bits in each packed 64-bit integer in \"a\" to the right by the number of bits specified in the corresponding element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_range_ps",
        "full_name": "__m512 _mm512_range_ps(__m512 a, __m512 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm512_mask_range_ps",
        "full_name": "__m512 _mm512_mask_range_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm512_maskz_range_ps",
        "full_name": "__m512 _mm512_maskz_range_ps(__mmask16 k, __m512 a, __m512 b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_range_pd",
        "full_name": "__m128d _mm_range_pd(__m128d a, __m128d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_mask_range_pd",
        "full_name": "__m128d _mm_mask_range_pd(__m128d src, __mmask8 k, __m128d a, __m128d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm_maskz_range_pd",
        "full_name": "__m128d _mm_maskz_range_pd(__mmask8 k, __m128d a, __m128d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm512_range_round_ps",
        "full_name": "__m512 _mm512_range_round_ps(__m512 a, __m512 b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm512_mask_range_round_ps",
        "full_name": "__m512 _mm512_mask_range_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm512_maskz_range_round_ps",
        "full_name": "__m512 _mm512_maskz_range_round_ps(__mmask16 k, __m512 a, __m512 b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm512_range_round_pd",
        "full_name": "__m512d _mm512_range_round_pd(__m512d a, __m512d b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm512_mask_range_round_pd",
        "full_name": "__m512d _mm512_mask_range_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm512_maskz_range_round_pd",
        "full_name": "__m512d _mm512_maskz_range_round_pd(__mmask8 k, __m512d a, __m512d b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm512_div_epi8",
        "full_name": "__m512i _mm512_div_epi8(__m512i a, __m512i b);",
        "description": "Divide packed signed 8-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_epi16",
        "full_name": "__m512i _mm512_div_epi16(__m512i a, __m512i b);",
        "description": "Divide packed signed 16-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_epi32",
        "full_name": "__m512i _mm512_div_epi32(__m512i a, __m512i b);",
        "description": "Divide packed signed 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_epi64",
        "full_name": "__m512i _mm512_div_epi64(__m512i a, __m512i b);",
        "description": "Divide packed signed 64-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_epu8",
        "full_name": "__m512i _mm512_div_epu8(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 8-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_epu16",
        "full_name": "__m512i _mm512_div_epu16(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 16-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_epu32",
        "full_name": "__m512i _mm512_div_epu32(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_epu64",
        "full_name": "__m512i _mm512_div_epu64(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 64-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_div_ps",
        "full_name": "__m512 _mm512_div_ps(__m512 a, __m512 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_div_pd",
        "full_name": "__m512d _mm512_div_pd(__m512d a, __m512d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_div_round_ps",
        "full_name": "__m512 _mm512_div_round_ps(__m512 a, __m512 b, int rounding);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_div_round_pd",
        "full_name": "__m512d _mm512_div_round_pd(__m512d a, __m512d b, int rounding);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", =and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_add_epi8",
        "full_name": "__m512i _mm512_add_epi8(__m512i a, __m512i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_add_epi16",
        "full_name": "__m512i _mm512_add_epi16(__m512i a, __m512i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_add_epi32",
        "full_name": "__m512i _mm512_add_epi32(__m512i a, __m512i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_add_epi64",
        "full_name": "__m512i _mm512_add_epi64(__m512i a, __m512i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_adds_epi8",
        "full_name": "__m512i _mm512_adds_epi8(__m512i a, __m512i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_adds_epi16",
        "full_name": "__m512i _mm512_adds_epi16(__m512i a, __m512i b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_adds_epu8",
        "full_name": "__m512i _mm512_adds_epu8(__m512i a, __m512i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_adds_epu16",
        "full_name": "__m512i _mm512_adds_epu16(__m512i a, __m512i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_add_ps",
        "full_name": "__m512 _mm512_add_ps(__m512 a, __m512 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_add_pd",
        "full_name": "__m512d _mm512_add_pd(__m512d a, __m512d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_add_round_ps",
        "full_name": "__m512 _mm512_add_round_ps(__m512 a, __m512 b, int rounding);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_add_round_pd",
        "full_name": "__m512d _mm512_add_round_pd(__m512d a, __m512d b, int rounding);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_sub_epi16",
        "full_name": "__m512i _mm512_sub_epi16(__m512i a, __m512i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sub_epi32",
        "full_name": "__m512i _mm512_sub_epi32(__m512i a, __m512i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sub_epi64",
        "full_name": "__m512i _mm512_sub_epi64(__m512i a, __m512i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sub_epi8",
        "full_name": "__m512i _mm512_sub_epi8(__m512i a, __m512i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sub_pd",
        "full_name": "__m512d _mm512_sub_pd(__m512d a, __m512d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sub_ps",
        "full_name": "__m512 _mm512_sub_ps(__m512 a, __m512 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sub_round_pd",
        "full_name": "__m512d _mm512_sub_round_pd(__m512d a, __m512d b, int rounding);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_sub_round_ps",
        "full_name": "__m512 _mm512_sub_round_ps(__m512 a, __m512 b, int rounding);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_subs_epi16",
        "full_name": "__m512i _mm512_subs_epi16(__m512i a, __m512i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_subs_epi8",
        "full_name": "__m512i _mm512_subs_epi8(__m512i a, __m512i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_subs_epu16",
        "full_name": "__m512i _mm512_subs_epu16(__m512i a, __m512i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_subs_epu8",
        "full_name": "__m512i _mm512_subs_epu8(__m512i a, __m512i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_permutexvar_epi32",
        "full_name": "__m512i _mm512_permutexvar_epi32(__m512i idx, __m512i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_permutexvar_epi64",
        "full_name": "__m512i _mm512_permutexvar_epi64(__m512i idx, __m512i a);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mul_epi32",
        "full_name": "__m512i _mm512_mul_epi32(__m512i a, __m512i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\"."
    },
    {
        "name": "_mm512_mul_epu32",
        "full_name": "__m512i _mm512_mul_epu32(__m512i a, __m512i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\"."
    },
    {
        "name": "_mm512_mul_pd",
        "full_name": "__m512d _mm512_mul_pd(__m512d a, __m512d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mul_ps",
        "full_name": "__m512 _mm512_mul_ps(__m512 a, __m512 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mulhi_epi16",
        "full_name": "__m512i _mm512_mulhi_epi16(__m512i a, __m512i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm512_mulhi_epu16",
        "full_name": "__m512i _mm512_mulhi_epu16(__m512i a, __m512i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm512_mullo_epi16",
        "full_name": "__m512i _mm512_mullo_epi16(__m512i a, __m512i b);",
        "description": "Multiply the packed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm512_mullo_epi32",
        "full_name": "__m512i _mm512_mullo_epi32(__m512i a, __m512i b);",
        "description": "Multiply the packed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm512_mullo_epi64",
        "full_name": "__m512i _mm512_mullo_epi64(__m512i a, __m512i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm512_mullox_epi64",
        "full_name": "__m512i _mm512_mullox_epi64(__m512i a, __m512i b);",
        "description": "Multiplies elements in packed 64-bit integer vectors \"a\" and \"b\" together, storing the lower 64 bits of the result in \"dst\"."
    },
    {
        "name": "_mm512_mulhrs_epi16",
        "full_name": "__m512i _mm512_mulhrs_epi16(__m512i a, __m512i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\"."
    },
    {
        "name": "_mm512_mul_round_pd",
        "full_name": "__m512d _mm512_mul_round_pd(__m512d a, __m512d b, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mul_round_ps",
        "full_name": "__m512 _mm512_mul_round_ps(__m512 a, __m512 b, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_slli_epi64",
        "full_name": "__m512i _mm512_slli_epi64(__m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_srli_epi64",
        "full_name": "__m512i _mm512_srli_epi64(__m512i a, unsigned int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_bslli_epi128",
        "full_name": "__m512i _mm512_bslli_epi128(__m512i a, const int imm8);",
        "description": "Shift 128-bit lanes in \"a\" left by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_bsrli_epi128",
        "full_name": "__m512i _mm512_bsrli_epi128(__m512i a, const int imm8);",
        "description": "Shift 128-bit lanes in \"a\" right by \"imm8\" bytes while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_unpackhi_epi8",
        "full_name": "__m512i _mm512_unpackhi_epi8(__m512i a, __m512i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_unpacklo_epi8",
        "full_name": "__m512i _mm512_unpacklo_epi8(__m512i a, __m512i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cmpeq_epi32_mask",
        "full_name": "__mmask16 _mm512_cmpeq_epi32_mask(__m512i a, __m512i b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmplt_epi32_mask",
        "full_name": "__mmask16 _mm512_cmplt_epi32_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmpgt_epi32_mask",
        "full_name": "__mmask16 _mm512_cmpgt_epi32_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmple_epi32_mask",
        "full_name": "__mmask16 _mm512_cmple_epi32_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmpneq_epi32_mask",
        "full_name": "__mmask16 _mm512_cmpneq_epi32_mask(__m512i a, __m512i b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmp_epi32_mask",
        "full_name": "__mmask16 _mm512_cmp_epi32_mask(__m512i a, __m512i b, const _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmpeq_epi8_mask",
        "full_name": "__mmask64 _mm512_cmpeq_epi8_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpeq_epi8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmplt_epi8_mask",
        "full_name": "__mmask64 _mm512_cmplt_epi8_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmple_epi8_mask",
        "full_name": "__mmask64 _mm512_cmple_epi8_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmpneq_epi8_mask",
        "full_name": "__mmask64 _mm512_cmpneq_epi8_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmp_epi8_mask",
        "full_name": "__mmask64 _mm512_cmp_epi8_mask(__m512i a, __m512i b, const int imm8);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_and_si512",
        "full_name": "__m512i _mm512_and_si512(__m512i a, __m512i b);",
        "description": "Compute the bitwise AND of 512 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_or_si512",
        "full_name": "__m512i _mm512_or_si512(__m512i a, __m512i b);",
        "description": "Compute the bitwise OR of 512 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_andnot_si512",
        "full_name": "__m512i _mm512_andnot_si512(__m512i a, __m512i b);",
        "description": "Compute the bitwise NOT of 512 bits (representing integer data) in \"a\" and then AND with \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_xor_si512",
        "full_name": "__m512i _mm512_xor_si512(__m512i a, __m512i b);",
        "description": "Compute the bitwise XOR of 512 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_and_epi32",
        "full_name": "__m512i _mm512_and_epi32(__m512i a, __m512i b);",
        "description": "Compute the bitwise AND of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_and_epi64",
        "full_name": "__m512i _mm512_and_epi64(__m512i a, __m512i b);",
        "description": "Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_or_epi32",
        "full_name": "__m512i _mm512_or_epi32(__m512i a, __m512i b);",
        "description": "Compute the bitwise OR of packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_or_epi64",
        "full_name": "__m512i _mm512_or_epi64(__m512i a, __m512i b);",
        "description": "Compute the bitwise OR of packed 64-bit integers in \"a\" and \"b\", and store the resut in \"dst\"."
    },
    {
        "name": "_mm512_xor_ps",
        "full_name": "__m512 _mm512_xor_ps(__m512 a, __m512 b);",
        "description": "Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_xor_pd",
        "full_name": "__m512d _mm512_xor_pd(__m512d a, __m512d b);",
        "description": "Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_set1_epi32",
        "full_name": "__m512i _mm512_set1_epi32(int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_set1_epi64",
        "full_name": "__m512i _mm512_set1_epi64(__int64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_set1_epi8",
        "full_name": "__m512i _mm512_set1_epi8(char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_set_pd",
        "full_name": "__m512d _mm512_set_pd(double e7, double e6, double e5, double e4, double e3, double e2, double e1, double e0);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the supplied values."
    },
    {
        "name": "_mm512_set1_ps",
        "full_name": "__m512 _mm512_set1_ps(float a);",
        "description": "Broadcast single-precision (32-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_set1_pd",
        "full_name": "__m512d _mm512_set1_pd(double a);",
        "description": "Broadcast double-precision (64-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_movm_epi8",
        "full_name": "__m512i _mm512_movm_epi8(__mmask64 k);",
        "description": "Set each packed 8-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm512_movm_epi32",
        "full_name": "__m512i _mm512_movm_epi32(__mmask16 k);",
        "description": "Set each packed 32-bit integer in \"dst\" to all ones or all zeros based on the value of the corresponding bit in \"k\"."
    },
    {
        "name": "_mm512_extracti32x4_epi32",
        "full_name": "__m128i _mm512_extracti32x4_epi32(__m512i a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_extractf32x8_ps",
        "full_name": "__m256 _mm512_extractf32x8_ps(__m512 a, int imm8);",
        "description": "Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_extractf64x4_pd",
        "full_name": "__m256d _mm512_extractf64x4_pd(__m512d a, int imm8);",
        "description": "Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_mask_loadu_epi8",
        "full_name": "__m512i _mm512_mask_loadu_epi8(__m512i src, __mmask64 k, void const * mem_addr);",
        "description": "Load packed 8-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_maskz_loadu_epi8",
        "full_name": "__m512i _mm512_maskz_loadu_epi8(__mmask64 k, void const * mem_addr);",
        "description": "Load packed 8-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_abs_epi8",
        "full_name": "__m512i _mm512_abs_epi8(__m512i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm512_broadcast_i32x4",
        "full_name": "__m512i _mm512_broadcast_i32x4(__m128i a);",
        "description": "Broadcast the 4 packed 32-bit integers from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_broadcast_i64x4",
        "full_name": "__m512i _mm512_broadcast_i64x4(__m256i a);",
        "description": "Broadcast the 4 packed 64-bit integers from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_broadcast_i64x4",
        "full_name": "__m512i _mm512_mask_broadcast_i64x4(__m512i src, __mmask8 k, __m256i a);",
        "description": "Broadcast the 4 packed 64-bit integers from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shuffle_epi8",
        "full_name": "__m512i _mm512_shuffle_epi8(__m512i a, __m512i b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_maskz_shuffle_epi8",
        "full_name": "__m512i _mm512_maskz_shuffle_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_blend_epi32",
        "full_name": "__m512i _mm512_mask_blend_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Blend packed 32-bit integers from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_blend_ps",
        "full_name": "__m512 _mm512_mask_blend_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Blend packed single-precision (32-bit) floating-point elements from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_blend_pd",
        "full_name": "__m512d _mm512_mask_blend_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Blend packed double-precision (64-bit) floating-point elements from \"a\" and \"b\" using control mask \"k\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_castpd_ps",
        "full_name": "__m512 _mm512_castpd_ps(__m512d a);",
        "description": "Cast vector of type __m512d to type __m512.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castpd_si512",
        "full_name": "__m512i _mm512_castpd_si512(__m512d a);",
        "description": "Cast vector of type __m512d to type __m512i.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castps_pd",
        "full_name": "__m512d _mm512_castps_pd(__m512 a);",
        "description": "Cast vector of type __m512 to type __m512d.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castps_si512",
        "full_name": "__m512i _mm512_castps_si512(__m512 a);",
        "description": "Cast vector of type __m512 to type __m512i.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castsi512_pd",
        "full_name": "__m512d _mm512_castsi512_pd(__m512i a);",
        "description": "Cast vector of type __m512i to type __m512d.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castsi512_ps",
        "full_name": "__m512 _mm512_castsi512_ps(__m512i a);",
        "description": "Cast vector of type __m512i to type __m512.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castsi128_si512",
        "full_name": "__m512i _mm512_castsi128_si512(__m128i a);",
        "description": "Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castsi512_si128",
        "full_name": "__m128i _mm512_castsi512_si128(__m512i a);",
        "description": "Cast vector of type __m512i to type __m128i.\n\t This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castsi256_si512",
        "full_name": "__m512i _mm512_castsi256_si512(__m256i a);",
        "description": "Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined.\n\t This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castsi512_si256",
        "full_name": "__m256i _mm512_castsi512_si256(__m512i a);",
        "description": "Cast vector of type __m512i to type __m256i.\n\t This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castpd128_pd512",
        "full_name": "__m512d _mm512_castpd128_pd512(__m128d a);",
        "description": "Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castpd512_pd128",
        "full_name": "__m128d _mm512_castpd512_pd128(__m512d a);",
        "description": "Cast vector of type __m512d to type __m128d. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castpd256_pd512",
        "full_name": "__m512d _mm512_castpd256_pd512(__m256d a);",
        "description": "Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castpd512_pd256",
        "full_name": "__m256d _mm512_castpd512_pd256(__m512d a);",
        "description": "Cast vector of type __m512d to type __m256d. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castps128_ps512",
        "full_name": "__m512 _mm512_castps128_ps512(__m128 a);",
        "description": "Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castps512_ps128",
        "full_name": "__m128 _mm512_castps512_ps128(__m512 a);",
        "description": "Cast vector of type __m512 to type __m128. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castps256_ps512",
        "full_name": "__m512 _mm512_castps256_ps512(__m256 a);",
        "description": "Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_castps512_ps256",
        "full_name": "__m256 _mm512_castps512_ps256(__m512 a);",
        "description": "Cast vector of type __m512 to type __m256. \n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_cvtepi32_ps",
        "full_name": "__m512 _mm512_cvtepi32_ps(__m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cvtepi32_pd",
        "full_name": "__m512d _mm512_cvtepi32_pd(__m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_insertf32x8",
        "full_name": "__m512 _mm512_insertf32x8(__m512 a, __m256 b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm512_insertf64x4",
        "full_name": "__m512d _mm512_insertf64x4(__m512d a, __m256d b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm512_inserti32x8",
        "full_name": "__m512i _mm512_inserti32x8(__m512i a, __m256i b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 256 bits (composed of 8 packed 32-bit integers) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm512_inserti64x4",
        "full_name": "__m512i _mm512_inserti64x4(__m512i a, __m256i b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 256 bits (composed of 4 packed 64-bit integers) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm512_load_epi32",
        "full_name": "__m512i _mm512_load_epi32(void const * mem_addr);",
        "description": "Load 512-bits (composed of 16 packed 32-bit integers) from memory into \"dst\". \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_load_epi64",
        "full_name": "__m512i _mm512_load_epi64(void const * mem_addr);",
        "description": "Load 512-bits (composed of 8 packed 64-bit integers) from memory into \"dst\". \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_load_pd",
        "full_name": "__m512d _mm512_load_pd(void const * mem_addr);",
        "description": "Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into \"dst\". \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_load_ps",
        "full_name": "__m512 _mm512_load_ps(void const * mem_addr);",
        "description": "Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into \"dst\". \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_store_epi32",
        "full_name": "void _mm512_store_epi32(void *mem_addr, __m512i a);",
        "description": "Store 512-bits (composed of 16 packed 32-bit integers) from \"a\" into memory. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_store_epi64",
        "full_name": "void _mm512_store_epi64(void *mem_addr, __m512i a);",
        "description": "Store 512-bits (composed of 8 packed 64-bit integers) from \"a\" into memory. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_store_pd",
        "full_name": "void _mm512_store_pd(void *mem_addr, __m512d a);",
        "description": "Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_store_ps",
        "full_name": "void _mm512_store_ps(void *mem_addr, __m512 a);",
        "description": "Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from \"a\" into memory. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_max_epi32",
        "full_name": "__m512i _mm512_max_epi32(__m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm512_packs_epi32",
        "full_name": "__m512i _mm512_packs_epi32(__m512i a, __m512i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_zextsi128_si512",
        "full_name": "__m512i _mm512_zextsi128_si512(__m128i a);",
        "description": "Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_zextsi256_si512",
        "full_name": "__m512i _mm512_zextsi256_si512(__m256i a);",
        "description": "Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_zextpd128_pd512",
        "full_name": "__m512d _mm512_zextpd128_pd512(__m128d a);",
        "description": "Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_zextpd256_pd512",
        "full_name": "__m512d _mm512_zextpd256_pd512(__m256d a);",
        "description": "Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_zextps128_ps512",
        "full_name": "__m512 _mm512_zextps128_ps512(__m128 a);",
        "description": "Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_zextps256_ps512",
        "full_name": "__m512 _mm512_zextps256_ps512(__m256 a);",
        "description": "Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm512_undefined",
        "full_name": "__m512 _mm512_undefined(void);",
        "description": "Return vector of type __m512 with undefined elements."
    },
    {
        "name": "_mm512_undefined_epi32",
        "full_name": "__m512i _mm512_undefined_epi32();",
        "description": "Return vector of type __m512i with undefined elements."
    },
    {
        "name": "_mm512_undefined_pd",
        "full_name": "__m512d _mm512_undefined_pd();",
        "description": "Return vector of type __m512d with undefined elements."
    },
    {
        "name": "_mm512_undefined_ps",
        "full_name": "__m512 _mm512_undefined_ps();",
        "description": "Return vector of type __m512 with undefined elements."
    },
    {
        "name": "_mm256_set_ps",
        "full_name": "__m256 _mm256_set_ps(float e7, float e6, float e5, float e4, float e3, float e2, float e1, float e0);",
        "description": "Set packed single-precision (32-bit) floating-point elements in \"dst\" with the supplied values."
    },
    {
        "name": "_mm256_set_pd",
        "full_name": "__m256d _mm256_set_pd(double e3, double e2, double e1, double e0);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the supplied values."
    },
    {
        "name": "_mm256_set1_epi8",
        "full_name": "__m256i _mm256_set1_epi8(int8_t a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\". This intrinsic may generate the \"vpbroadcastb\"."
    },
    {
        "name": "_mm256_set1_epi32",
        "full_name": "__m256i _mm256_set1_epi32(int32_t a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\". This intrinsic may generate the \"vpbroadcastd\"."
    },
    {
        "name": "_mm256_set1_epi64x",
        "full_name": "__m256i _mm256_set1_epi64x(int64_t a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\". This intrinsic may generate the \"vpbroadcastq\"."
    },
    {
        "name": "_mm256_set1_pd",
        "full_name": "__m256d _mm256_set1_pd(double a);",
        "description": "Broadcast double-precision (64-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_set1_ps",
        "full_name": "__m256 _mm256_set1_ps(float a);",
        "description": "Broadcast single-precision (32-bit) floating-point value \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_div_epi8",
        "full_name": "__m256i _mm256_div_epi8(__m256i a, __m256i b);",
        "description": "Divide packed signed 8-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_div_epi16",
        "full_name": "__m256i _mm256_div_epi16(__m256i a, __m256i b);",
        "description": "Divide packed signed 16-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_div_epu8",
        "full_name": "__m256i _mm256_div_epu8(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 8-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_div_epu16",
        "full_name": "__m256i _mm256_div_epu16(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 16-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_div_ps",
        "full_name": "__m256 _mm256_div_ps(__m256 a, __m256 b);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_div_pd",
        "full_name": "__m256d _mm256_div_pd(__m256d a, __m256d b);",
        "description": "Divide packed double-precision (64-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_add_epi8",
        "full_name": "__m256i _mm256_add_epi8(__m256i a, __m256i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_add_epi16",
        "full_name": "__m256i _mm256_add_epi16(__m256i a, __m256i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_add_epi32",
        "full_name": "__m256i _mm256_add_epi32(__m256i a, __m256i b);",
        "description": "Add packed 32-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_add_epi64",
        "full_name": "__m256i _mm256_add_epi64(__m256i a, __m256i b);",
        "description": "Add packed 64-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_adds_epi8",
        "full_name": "__m256i _mm256_adds_epi8(__m256i a, __m256i b);",
        "description": "Add packed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_adds_epi16",
        "full_name": "__m256i _mm256_adds_epi16(__m256i a, __m256i b);",
        "description": "Add packed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_adds_epu8",
        "full_name": "__m256i _mm256_adds_epu8(__m256i a, __m256i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_adds_epu16",
        "full_name": "__m256i _mm256_adds_epu16(__m256i a, __m256i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_add_ps",
        "full_name": "__m256 _mm256_add_ps(__m256 a, __m256 b);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_add_pd",
        "full_name": "__m256d _mm256_add_pd(__m256d a, __m256d b);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_addsub_ps",
        "full_name": "__m256 _mm256_addsub_ps(__m256 a, __m256 b);",
        "description": "Alternatively add and subtract packed single-precision (32-bit) floating-point elements in \"a\" to/from packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_addsub_pd",
        "full_name": "__m256d _mm256_addsub_pd(__m256d a, __m256d b);",
        "description": "Alternatively add and subtract packed double-precision (64-bit) floating-point elements in \"a\" to/from packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sub_epi16",
        "full_name": "__m256i _mm256_sub_epi16(__m256i a, __m256i b);",
        "description": "Subtract packed 16-bit integers in \"b\" from packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sub_epi32",
        "full_name": "__m256i _mm256_sub_epi32(__m256i a, __m256i b);",
        "description": "Subtract packed 32-bit integers in \"b\" from packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sub_epi64",
        "full_name": "__m256i _mm256_sub_epi64(__m256i a, __m256i b);",
        "description": "Subtract packed 64-bit integers in \"b\" from packed 64-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sub_epi8",
        "full_name": "__m256i _mm256_sub_epi8(__m256i a, __m256i b);",
        "description": "Subtract packed 8-bit integers in \"b\" from packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sub_pd",
        "full_name": "__m256d _mm256_sub_pd(__m256d a, __m256d b);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sub_ps",
        "full_name": "__m256 _mm256_sub_ps(__m256 a, __m256 b);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_subs_epi16",
        "full_name": "__m256i _mm256_subs_epi16(__m256i a, __m256i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_subs_epi8",
        "full_name": "__m256i _mm256_subs_epi8(__m256i a, __m256i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_subs_epu16",
        "full_name": "__m256i _mm256_subs_epu16(__m256i a, __m256i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_subs_epu8",
        "full_name": "__m256i _mm256_subs_epu8(__m256i a, __m256i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mul_epi32",
        "full_name": "__m256i _mm256_mul_epi32(__m256i a, __m256i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\"."
    },
    {
        "name": "_mm256_mul_epu32",
        "full_name": "__m256i _mm256_mul_epu32(__m256i a, __m256i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\"."
    },
    {
        "name": "_mm256_mul_pd",
        "full_name": "__m256d _mm256_mul_pd(__m256d a, __m256d b);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mul_ps",
        "full_name": "__m256 _mm256_mul_ps(__m256 a, __m256 b);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mullo_epi16",
        "full_name": "__m256i _mm256_mullo_epi16(__m256i a, __m256i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm256_mullo_epi32",
        "full_name": "__m256i _mm256_mullo_epi32(__m256i a, __m256i b);",
        "description": "Multiply the packed signed 32-bit integers in \"a\" and \"b\", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm256_zeroupper",
        "full_name": "void _mm256_zeroupper(void);",
        "description": "Zero the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified."
    },
    {
        "name": "_mm256_sll_epi64",
        "full_name": "__m256i _mm256_sll_epi64(__m256i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_srli_epi64",
        "full_name": "__m256i _mm256_srli_epi64(__m256i a, int imm8);",
        "description": "Shift packed 64-bit integers in \"a\" right by \"imm8\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_unpackhi_epi8",
        "full_name": "__m256i _mm256_unpackhi_epi8(__m256i a, __m256i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_unpacklo_epi8",
        "full_name": "__m256i _mm256_unpacklo_epi8(__m256i a, __m256i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_and_si256",
        "full_name": "__m256i _mm256_and_si256(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of 256 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_andnot_si256",
        "full_name": "__m256i _mm256_andnot_si256(__m256i a, __m256i b);",
        "description": "Compute the bitwise NOT of 256 bits (representing integer data) in \"a\" and then AND with \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_xor_si256",
        "full_name": "__m256i _mm256_xor_si256(__m256i a, __m256i b);",
        "description": "Compute the bitwise XOR of 256 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_or_ps",
        "full_name": "__m256 _mm256_or_ps(__m256 a, __m256 b);",
        "description": "Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_or_pd",
        "full_name": "__m256d _mm256_or_pd(__m256d a, __m256d b);",
        "description": "Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_movemask_epi8",
        "full_name": "int _mm256_movemask_epi8(__m256i a);",
        "description": "Create mask from the most significant bit of each 8-bit element in \"a\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_movemask_ps",
        "full_name": "int _mm256_movemask_ps(__m256 a);",
        "description": "Set each bit of mask \"dst\" based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in \"a\"."
    },
    {
        "name": "_mm256_testz_si256",
        "full_name": "int _mm256_testz_si256(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of 256 bits (representing integer data) in \"a\" and \"b\", and set \"ZF\" to 1 if the result is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", and set \"CF\" to 1 if the result is zero, otherwise set \"CF\" to 0. Return the \"ZF\" value."
    },
    {
        "name": "_mm256_or_si256",
        "full_name": "__m256i _mm256_or_si256(__m256i a, __m256i b);",
        "description": "Compute the bitwise OR of 256 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_extracti128_si256",
        "full_name": "__m128i _mm256_extracti128_si256(__m256i a, const int imm8);",
        "description": "Extract 128 bits (composed of integer data) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_extractf128_ps",
        "full_name": "__m128 _mm256_extractf128_ps(__m256 a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_extractf128_pd",
        "full_name": "__m128d _mm256_extractf128_pd(__m256d a, const int imm8);",
        "description": "Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_permute4x64_epi64",
        "full_name": "__m256i _mm256_permute4x64_epi64(__m256i a, const int imm8);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_permute2f128_si256",
        "full_name": "__m256i _mm256_permute2f128_si256(__m256i a, __m256i b, int imm8);",
        "description": "Shuffle 128-bits (composed of integer data) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_maskload_epi32",
        "full_name": "__m256i _mm256_maskload_epi32(int const * mem_addr, __m256i mask);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using \"mask\" (elements are zeroed out when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm256_broadcastq_epi64",
        "full_name": "__m256i _mm256_broadcastq_epi64(__m128i a);",
        "description": "Broadcast the low packed 64-bit integer from \"a\" to all elements of \"dst\"."
    },
    {
        "name": "_mm256_broadcastsi128_si256",
        "full_name": "__m256i _mm256_broadcastsi128_si256(__m128i a);",
        "description": "Broadcast 128 bits of integer data from \"a\" to all 128-bit lanes in \"dst\"."
    },
    {
        "name": "_mm256_castpd128_pd256",
        "full_name": "__m256d _mm256_castpd128_pd256(__m128d a);",
        "description": "Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castpd256_pd128",
        "full_name": "__m128d _mm256_castpd256_pd128(__m256d a);",
        "description": "Cast vector of type __m256d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castps128_ps256",
        "full_name": "__m256 _mm256_castps128_ps256(__m128 a);",
        "description": "Cast vector of type __m128 to type __m256; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castps256_ps128",
        "full_name": "__m128 _mm256_castps256_ps128(__m256 a);",
        "description": "Cast vector of type __m256 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castsi128_si256",
        "full_name": "__m256i _mm256_castsi128_si256(__m128i a);",
        "description": "Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castsi256_ps",
        "full_name": "__m256 _mm256_castsi256_ps(__m256i a);",
        "description": "Cast vector of type __m256i to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castsi256_pd",
        "full_name": "__m256d _mm256_castsi256_pd(__m256i a);",
        "description": "Cast vector of type __m256i to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castsi256_si128",
        "full_name": "__m128i _mm256_castsi256_si128(__m256i a);",
        "description": "Cast vector of type __m256i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castpd_ps",
        "full_name": "__m256 _mm256_castpd_ps(__m256d a);",
        "description": "Cast vector of type __m256d to type __m256.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castpd_si256",
        "full_name": "__m256i _mm256_castpd_si256(__m256d a);",
        "description": "Cast vector of type __m256d to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castps_pd",
        "full_name": "__m256d _mm256_castps_pd(__m256 a);",
        "description": "Cast vector of type __m256 to type __m256d.\n\tThis intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_castps_si256",
        "full_name": "__m256i _mm256_castps_si256(__m256 a);",
        "description": "Cast vector of type __m256 to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_cvtepi32_ps",
        "full_name": "__m256 _mm256_cvtepi32_ps(__m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_shuffle_epi8",
        "full_name": "__m256i _mm256_shuffle_epi8(__m256i a, __m256i b);",
        "description": "Shuffle 8-bit integers in \"a\" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_alignr_epi8",
        "full_name": "__m256i _mm256_alignr_epi8(__m256i a, __m256i b, const int count);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\"."
    },
    {
        "name": "_mm256_blendv_pd",
        "full_name": "__m256d _mm256_blendv_pd(__m256d a, __m256d b, __m256d mask);",
        "description": "Blend packed double-precision (64-bit) floating-point elements from \"a\" and \"b\" using \"mask\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_blendv_ps",
        "full_name": "__m256 _mm256_blendv_ps(__m256 a, __m256 b, __m256 mask);",
        "description": "Blend packed single-precision (32-bit) floating-point elements from \"a\" and \"b\" using \"mask\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_blend_ps",
        "full_name": "__m256 _mm256_blend_ps(__m256 a, __m256 b, const int imm8);",
        "description": "Blend packed single-precision (32-bit) floating-point elements from \"a\" and \"b\" using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_blend_pd",
        "full_name": "__m256d _mm256_blend_pd(__m256d a, __m256d b, const int imm8);",
        "description": "Blend packed double-precision (64-bit) floating-point elements from \"a\" and \"b\" using control mask \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_inserti128_si256",
        "full_name": "__m256i _mm256_inserti128_si256(__m256i a, __m128i b, const int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of integer data) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_insertf128_pd",
        "full_name": "__m256d _mm256_insertf128_pd(__m256d a, __m128d b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_insertf128_ps",
        "full_name": "__m256 _mm256_insertf128_ps(__m256 a, __m128 b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_insert_epi32",
        "full_name": "__m256i _mm256_insert_epi32(__m256i a, __int32 i, const int index);",
        "description": "Copy \"a\" to \"dst\", and insert the 32-bit integer \"i\" into \"dst\" at the location specified by \"index\"."
    },
    {
        "name": "_mm256_insert_epi64",
        "full_name": "__m256i _mm256_insert_epi64(__m256i a, __int64 i, const int index);",
        "description": "Copy \"a\" to \"dst\", and insert the 64-bit integer \"i\" into \"dst\" at the location specified by \"index\"."
    },
    {
        "name": "_mm256_cmpgt_epi32",
        "full_name": "__m256i _mm256_cmpgt_epi32(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmpeq_epi32",
        "full_name": "__m256i _mm256_cmpeq_epi32(__m256i a, __m256i b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmpeq_epi8",
        "full_name": "__m256i _mm256_cmpeq_epi8(__m256i a, __m256i b);",
        "description": "Compare packed 8-bit integers in \"a\" and \"b\" for equality, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmp_pd",
        "full_name": "__m256d _mm256_cmp_pd(__m256d a, __m256d b, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cmp_ps",
        "full_name": "__m256 _mm256_cmp_ps(__m256 a, __m256 b, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_load_epi32",
        "full_name": "__m256i _mm256_load_epi32(void const * mem_addr);",
        "description": "Load 256-bits (composed of 8 packed 32-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_load_epi64",
        "full_name": "__m256i _mm256_load_epi64(void const * mem_addr);",
        "description": "Load 256-bits (composed of 4 packed 64-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_load_pd",
        "full_name": "__m256d _mm256_load_pd(double const * mem_addr);",
        "description": "Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_load_ps",
        "full_name": "__m256 _mm256_load_ps(float const * mem_addr);",
        "description": "Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_store_epi32",
        "full_name": "void _mm256_store_epi32(void *mem_addr, __m256i a);",
        "description": "Store 256-bits (composed of 8 packed 32-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_store_epi64",
        "full_name": "void _mm256_store_epi64(void *mem_addr, __m256i a);",
        "description": "Store 256-bits (composed of 4 packed 64-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_store_pd",
        "full_name": "void _mm256_store_pd(double *mem_addr, __m256d a);",
        "description": "Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_store_ps",
        "full_name": "void _mm256_store_ps(float *mem_addr, __m256 a);",
        "description": "Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_max_epi32",
        "full_name": "__m256i _mm256_max_epi32(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\"."
    },
    {
        "name": "_mm256_packs_epi32",
        "full_name": "__m256i _mm256_packs_epi32(__m256i a, __m256i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_zextsi128_si256",
        "full_name": "__m256i _mm256_zextsi128_si256(__m128i a);",
        "description": "Cast vector of type __m128i to type __m256i; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_zextpd128_pd256",
        "full_name": "__m256d _mm256_zextpd128_pd256(__m128d a);",
        "description": "Cast vector of type __m128d to type __m256d; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_zextps128_ps256",
        "full_name": "__m256 _mm256_zextps128_ps256(__m128 a);",
        "description": "Cast vector of type __m128 to type __m256; the upper 128 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency."
    },
    {
        "name": "_mm256_undefined_si256",
        "full_name": "__m256i _mm256_undefined_si256(void);",
        "description": "Return vector of type __m256i with undefined elements."
    },
    {
        "name": "_mm256_undefined_pd",
        "full_name": "__m256d _mm256_undefined_pd(void);",
        "description": "Return vector of type __m256d with undefined elements."
    },
    {
        "name": "_mm256_undefined_ps",
        "full_name": "__m256 _mm256_undefined_ps(void);",
        "description": "Return vector of type __m256 with undefined elements."
    },
    {
        "name": "_mm512_cmp_ps_mask",
        "full_name": "__mmask16 _mm512_cmp_ps_mask(__m512 a, __m512 b, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmp_pd_mask",
        "full_name": "__mmask8 _mm512_cmp_pd_mask(__m512d a, __m512d b, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmpeq_ps_mask",
        "full_name": "__mmask16 _mm512_cmpeq_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmpeq_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epi16_mask",
        "full_name": "__mmask8 _mm_cmpge_epi16_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epi16_mask",
        "full_name": "__mmask8 _mm_mask_cmpge_epi16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epi16_mask",
        "full_name": "__mmask16 _mm256_cmpge_epi16_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpge_epi16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epi16_mask",
        "full_name": "__mmask32 _mm512_cmpge_epi16_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpge_epi16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epi32_mask",
        "full_name": "__mmask8 _mm_cmpge_epi32_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epi32_mask",
        "full_name": "__mmask8 _mm_mask_cmpge_epi32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epi32_mask",
        "full_name": "__mmask8 _mm256_cmpge_epi32_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpge_epi32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epi32_mask",
        "full_name": "__mmask16 _mm512_cmpge_epi32_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpge_epi32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epi64_mask",
        "full_name": "__mmask8 _mm_cmpge_epi64_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epi64_mask",
        "full_name": "__mmask8 _mm_mask_cmpge_epi64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epi64_mask",
        "full_name": "__mmask8 _mm256_cmpge_epi64_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpge_epi64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epi64_mask",
        "full_name": "__mmask8 _mm512_cmpge_epi64_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpge_epi64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epi8_mask",
        "full_name": "__mmask16 _mm_cmpge_epi8_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epi8_mask",
        "full_name": "__mmask16 _mm_mask_cmpge_epi8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epi8_mask",
        "full_name": "__mmask32 _mm256_cmpge_epi8_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpge_epi8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epi8_mask",
        "full_name": "__mmask64 _mm512_cmpge_epi8_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpge_epi8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epu16_mask",
        "full_name": "__mmask8 _mm_cmpge_epu16_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epu16_mask",
        "full_name": "__mmask8 _mm_mask_cmpge_epu16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epu16_mask",
        "full_name": "__mmask16 _mm256_cmpge_epu16_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epu16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpge_epu16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epu16_mask",
        "full_name": "__mmask32 _mm512_cmpge_epu16_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epu16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpge_epu16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epu32_mask",
        "full_name": "__mmask8 _mm_cmpge_epu32_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epu32_mask",
        "full_name": "__mmask8 _mm_mask_cmpge_epu32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epu32_mask",
        "full_name": "__mmask8 _mm256_cmpge_epu32_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epu32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpge_epu32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epu32_mask",
        "full_name": "__mmask16 _mm512_cmpge_epu32_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epu32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpge_epu32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epu64_mask",
        "full_name": "__mmask8 _mm_cmpge_epu64_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epu64_mask",
        "full_name": "__mmask8 _mm_mask_cmpge_epu64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epu64_mask",
        "full_name": "__mmask8 _mm256_cmpge_epu64_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epu64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpge_epu64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epu64_mask",
        "full_name": "__mmask8 _mm512_cmpge_epu64_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epu64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpge_epu64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpge_epu8_mask",
        "full_name": "__mmask16 _mm_cmpge_epu8_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpge_epu8_mask",
        "full_name": "__mmask16 _mm_mask_cmpge_epu8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpge_epu8_mask",
        "full_name": "__mmask32 _mm256_cmpge_epu8_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpge_epu8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpge_epu8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpge_epu8_mask",
        "full_name": "__mmask64 _mm512_cmpge_epu8_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpge_epu8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpge_epu8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epi16_mask",
        "full_name": "__mmask8 _mm_cmpgt_epi16_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epi16_mask",
        "full_name": "__mmask8 _mm_mask_cmpgt_epi16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epi16_mask",
        "full_name": "__mmask16 _mm256_cmpgt_epi16_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpgt_epi16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpgt_epi16_mask",
        "full_name": "__mmask32 _mm512_cmpgt_epi16_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpgt_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpgt_epi16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epi32_mask",
        "full_name": "__mmask8 _mm_cmpgt_epi32_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epi32_mask",
        "full_name": "__mmask8 _mm_mask_cmpgt_epi32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epi32_mask",
        "full_name": "__mmask8 _mm256_cmpgt_epi32_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpgt_epi32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmpgt_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpgt_epi32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epi64_mask",
        "full_name": "__mmask8 _mm_cmpgt_epi64_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epi64_mask",
        "full_name": "__mmask8 _mm_mask_cmpgt_epi64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epi64_mask",
        "full_name": "__mmask8 _mm256_cmpgt_epi64_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpgt_epi64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpneq_ps_mask",
        "full_name": "__mmask16 _mm512_cmpneq_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmpneq_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_exp10_ps",
        "full_name": "__m256 _mm256_exp10_ps(__m256 a);",
        "description": "Compute the exponential value of 10 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_exp2_ps",
        "full_name": "__m128 _mm_exp2_ps(__m128 a);",
        "description": "Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_exp2_ps",
        "full_name": "__m256 _mm256_exp2_ps(__m256 a);",
        "description": "Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_exp2_ps",
        "full_name": "__m512 _mm512_exp2_ps(__m512 a);",
        "description": "Compute the exponential value of 2 raised to the power of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_extractf128_si256",
        "full_name": "__m128i _mm256_extractf128_si256(__m256i a, const int imm8);",
        "description": "Extract 128 bits (composed of integer data) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_extractf32x4_ps",
        "full_name": "__m128 _mm256_extractf32x4_ps(__m256 a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_mask_extractf32x4_ps",
        "full_name": "__m128 _mm256_mask_extractf32x4_ps(__m128 src, __mmask8 k, __m256 a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_extractf32x4_ps",
        "full_name": "__m128 _mm256_maskz_extractf32x4_ps(__mmask8 k, __m256 a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_extractf32x4_ps",
        "full_name": "__m128 _mm512_extractf32x4_ps(__m512 a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_mask_extractf32x4_ps",
        "full_name": "__m128 _mm512_mask_extractf32x4_ps(__m128 src, __mmask8 k, __m512 a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extractf32x4_ps",
        "full_name": "__m128 _mm512_maskz_extractf32x4_ps(__mmask8 k, __m512 a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_extractf32x8_ps",
        "full_name": "__m256 _mm512_mask_extractf32x8_ps(__m256 src, __mmask8 k, __m512 a, const int imm8);",
        "description": "Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extractf32x8_ps",
        "full_name": "__m256 _mm512_maskz_extractf32x8_ps(__mmask8 k, __m512 a, const int imm8);",
        "description": "Extract 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_extractf64x2_pd",
        "full_name": "__m128d _mm256_extractf64x2_pd(__m256d a, const int imm8);",
        "description": "Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_mask_extractf64x2_pd",
        "full_name": "__m128d _mm256_mask_extractf64x2_pd(__m128d src, __mmask8 k, __m256d a, const int imm8);",
        "description": "Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_extractf64x2_pd",
        "full_name": "__m128d _mm256_maskz_extractf64x2_pd(__mmask8 k, __m256d a, const int imm8);",
        "description": "Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_extractf64x2_pd",
        "full_name": "__m128d _mm512_extractf64x2_pd(__m512d a, const int imm8);",
        "description": "Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_mask_extractf64x2_pd",
        "full_name": "__m128d _mm512_mask_extractf64x2_pd(__m128d src, __mmask8 k, __m512d a, const int imm8);",
        "description": "Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extractf64x2_pd",
        "full_name": "__m128d _mm512_maskz_extractf64x2_pd(__mmask8 k, __m512d a, const int imm8);",
        "description": "Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_extractf64x4_pd",
        "full_name": "__m256d _mm512_mask_extractf64x4_pd(__m256d src, __mmask8 k, __m512d a, const int imm8);",
        "description": "Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extractf64x4_pd",
        "full_name": "__m256d _mm512_maskz_extractf64x4_pd(__mmask8 k, __m512d a, const int imm8);",
        "description": "Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_extracti32x4_epi32",
        "full_name": "__m128i _mm256_extracti32x4_epi32(__m256i a, const int imm8);",
        "description": "Extract 128 bits (composed of 4 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_blsi_u32",
        "full_name": "unsigned int _blsi_u32(unsigned int a);",
        "description": "Extract the lowest set bit from unsigned 32-bit integer \"a\" and set the corresponding bit in \"dst\". All other bits in \"dst\" are zeroed, and all bits are zeroed if no bits are set in \"a\"."
    },
    {
        "name": "_blsi_u64",
        "full_name": "unsigned __int64 _blsi_u64(unsigned __int64 a);",
        "description": "Extract the lowest set bit from unsigned 64-bit integer \"a\" and set the corresponding bit in \"dst\". All other bits in \"dst\" are zeroed, and all bits are zeroed if no bits are set in \"a\"."
    },
    {
        "name": "_blsmsk_u32",
        "full_name": "unsigned int _blsmsk_u32(unsigned int a);",
        "description": "Set all the lower bits of \"dst\" up to and including the lowest set bit in unsigned 32-bit integer \"a\"."
    },
    {
        "name": "_blsmsk_u64",
        "full_name": "unsigned __int64 _blsmsk_u64(unsigned __int64 a);",
        "description": "Set all the lower bits of \"dst\" up to and including the lowest set bit in unsigned 64-bit integer \"a\"."
    },
    {
        "name": "_blsr_u32",
        "full_name": "unsigned int _blsr_u32(unsigned int a);",
        "description": "Copy all bits from unsigned 32-bit integer \"a\" to \"dst\", and reset (set to 0) the bit in \"dst\" that corresponds to the lowest set bit in \"a\"."
    },
    {
        "name": "_blsr_u64",
        "full_name": "unsigned __int64 _blsr_u64(unsigned __int64 a);",
        "description": "Copy all bits from unsigned 64-bit integer \"a\" to \"dst\", and reset (set to 0) the bit in \"dst\" that corresponds to the lowest set bit in \"a\"."
    },
    {
        "name": "_mm_cmpord_pd",
        "full_name": "__m128d _mm_cmpord_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cmpord_pd_mask",
        "full_name": "__mmask8 _mm512_cmpord_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpord_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmpord_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpord_ps",
        "full_name": "__m128 _mm_cmpord_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpord_sd",
        "full_name": "__m128d _mm_cmpord_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpord_ss",
        "full_name": "__m128 _mm_cmpord_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmpunord_pd",
        "full_name": "__m128d _mm_cmpunord_pd(__m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cmpunord_pd_mask",
        "full_name": "__mmask8 _mm512_cmpunord_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpunord_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmpunord_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpunord_ps",
        "full_name": "__m128 _mm_cmpunord_ps(__m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmpunord_sd",
        "full_name": "__m128d _mm_cmpunord_sd(__m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmpunord_ss",
        "full_name": "__m128 _mm_cmpunord_ss(__m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm512_kand",
        "full_name": "__mmask16 _mm512_kand(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise AND of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kand_mask16",
        "full_name": "__mmask16 _kand_mask16(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise AND of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kand_mask32",
        "full_name": "__mmask32 _kand_mask32(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise AND of 32-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kand_mask64",
        "full_name": "__mmask64 _kand_mask64(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise AND of 64-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kand_mask8",
        "full_name": "__mmask8 _kand_mask8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise AND of 8-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_mm512_kandn",
        "full_name": "__mmask16 _mm512_kandn(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise NOT of 16-bit masks \"a\" and then AND with \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kandn_mask16",
        "full_name": "__mmask16 _kandn_mask16(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise NOT of 16-bit masks \"a\" and then AND with \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kandn_mask32",
        "full_name": "__mmask32 _kandn_mask32(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise NOT of 32-bit masks \"a\" and then AND with \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kandn_mask64",
        "full_name": "__mmask64 _kandn_mask64(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise NOT of 64-bit masks \"a\" and then AND with \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kandn_mask8",
        "full_name": "__mmask8 _kandn_mask8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise NOT of 8-bit masks \"a\" and then AND with \"b\", and store the result in \"k\"."
    },
    {
        "name": "_mm512_knot",
        "full_name": "__mmask16 _mm512_knot(__mmask16 a);",
        "description": "Compute the bitwise NOT of 16-bit mask \"a\", and store the result in \"k\"."
    },
    {
        "name": "_knot_mask16",
        "full_name": "__mmask16 _knot_mask16(__mmask16 a);",
        "description": "Compute the bitwise NOT of 16-bit mask \"a\", and store the result in \"k\"."
    },
    {
        "name": "_knot_mask32",
        "full_name": "__mmask32 _knot_mask32(__mmask32 a);",
        "description": "Compute the bitwise NOT of 32-bit mask \"a\", and store the result in \"k\"."
    },
    {
        "name": "_knot_mask64",
        "full_name": "__mmask64 _knot_mask64(__mmask64 a);",
        "description": "Compute the bitwise NOT of 64-bit mask \"a\", and store the result in \"k\"."
    },
    {
        "name": "_knot_mask8",
        "full_name": "__mmask8 _knot_mask8(__mmask8 a);",
        "description": "Compute the bitwise NOT of 8-bit mask \"a\", and store the result in \"k\"."
    },
    {
        "name": "_mm512_kor",
        "full_name": "__mmask16 _mm512_kor(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise OR of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kor_mask16",
        "full_name": "__mmask16 _kor_mask16(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise OR of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kor_mask32",
        "full_name": "__mmask32 _kor_mask32(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise OR of 32-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kor_mask64",
        "full_name": "__mmask64 _kor_mask64(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise OR of 64-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kor_mask8",
        "full_name": "__mmask8 _kor_mask8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise OR of 8-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_mm512_kxor",
        "full_name": "__mmask16 _mm512_kxor(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise XOR of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_bit_scan_forward",
        "full_name": "int _bit_scan_forward(int a);",
        "description": "Set \"dst\" to the index of the lowest set bit in 32-bit integer \"a\". If no bits are set in \"a\" then \"dst\" is undefined."
    },
    {
        "name": "_bit_scan_reverse",
        "full_name": "int _bit_scan_reverse(int a);",
        "description": "Set \"dst\" to the index of the highest set bit in 32-bit integer \"a\". If no bits are set in \"a\" then \"dst\" is undefined."
    },
    {
        "name": "_BitScanForward",
        "full_name": "unsigned char _BitScanForward(unsigned __int32 *index, unsigned __int32 a);",
        "description": "Set \"index\" to the index of the lowest set bit in 32-bit integer \"mask\". If no bits are set in \"a\", then \"index\" is undefined and \"dst\" is set to 0, otherwise \"dst\" is set to 1."
    },
    {
        "name": "_BitScanForward64",
        "full_name": "unsigned char _BitScanForward64(unsigned __int32 *index, unsigned __int64 a);",
        "description": "Set \"index\" to the index of the lowest set bit in 32-bit integer \"mask\". If no bits are set in \"a\", then \"index\" is undefined and \"dst\" is set to 0, otherwise \"dst\" is set to 1."
    },
    {
        "name": "_BitScanReverse",
        "full_name": "unsigned char _BitScanReverse(unsigned __int32 *index, unsigned __int32 a);",
        "description": "Set \"index\" to the index of the highest set bit in 32-bit integer \"mask\". If no bits are set in \"a\", then \"index\" is undefined and \"dst\" is set to 0, otherwise \"dst\" is set to 1."
    },
    {
        "name": "_BitScanReverse64",
        "full_name": "unsigned char _BitScanReverse64(unsigned __int32 *index, unsigned __int64 a);",
        "description": "Set \"index\" to the index of the highest set bit in 32-bit integer \"mask\". If no bits are set in \"a\", then \"index\" is undefined and \"dst\" is set to 0, otherwise \"dst\" is set to 1."
    },
    {
        "name": "_kortest_mask16_u8",
        "full_name": "unsigned char _kortest_mask16_u8(__mmask16 a, __mmask16 b, unsigned char *all_ones);",
        "description": "Compute the bitwise OR of 16-bit masks \"a\" and \"b\". If the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". If the result is all ones, store 1 in \"all_ones\", otherwise store 0 in \"all_ones\"."
    },
    {
        "name": "_kortest_mask32_u8",
        "full_name": "unsigned char _kortest_mask32_u8(__mmask32 a, __mmask32 b, unsigned char *all_ones);",
        "description": "Compute the bitwise OR of 32-bit masks \"a\" and \"b\". If the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". If the result is all ones, store 1 in \"all_ones\", otherwise store 0 in \"all_ones\"."
    },
    {
        "name": "_kortest_mask64_u8",
        "full_name": "unsigned char _kortest_mask64_u8(__mmask64 a, __mmask64 b, unsigned char *all_ones);",
        "description": "Compute the bitwise OR of 64-bit masks \"a\" and \"b\". If the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". If the result is all ones, store 1 in \"all_ones\", otherwise store 0 in \"all_ones\"."
    },
    {
        "name": "_kortest_mask8_u8",
        "full_name": "unsigned char _kortest_mask8_u8(__mmask8 a, __mmask8 b, unsigned char *all_ones);",
        "description": "Compute the bitwise OR of 8-bit masks \"a\" and \"b\". If the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". If the result is all ones, store 1 in \"all_ones\", otherwise store 0 in \"all_ones\"."
    },
    {
        "name": "_kortestc_mask16_u8",
        "full_name": "unsigned char _kortestc_mask16_u8(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise OR of 16-bit masks \"a\" and \"b\". If the result is all ones, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_kortestc_mask32_u8",
        "full_name": "unsigned char _kortestc_mask32_u8(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise OR of 32-bit masks \"a\" and \"b\". If the result is all ones, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_kortestc_mask64_u8",
        "full_name": "unsigned char _kortestc_mask64_u8(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise OR of 64-bit masks \"a\" and \"b\". If the result is all ones, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_kortestc_mask8_u8",
        "full_name": "unsigned char _kortestc_mask8_u8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise OR of 8-bit masks \"a\" and \"b\". If the result is all ones, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_kortestz_mask16_u8",
        "full_name": "unsigned char _kortestz_mask16_u8(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise OR of 16-bit masks \"a\" and \"b\". If the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_kortestz_mask32_u8",
        "full_name": "unsigned char _kortestz_mask32_u8(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise OR of 32-bit masks \"a\" and \"b\". If the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_kortestz_mask64_u8",
        "full_name": "unsigned char _kortestz_mask64_u8(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise OR of 64-bit masks \"a\" and \"b\". If the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_kortestz_mask8_u8",
        "full_name": "unsigned char _kortestz_mask8_u8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise OR of 8-bit masks \"a\" and \"b\". If the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktest_mask16_u8",
        "full_name": "unsigned char _ktest_mask16_u8(__mmask16 a, __mmask16 b, unsigned char *and_not);",
        "description": "Compute the bitwise AND of 16-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". Compute the bitwise NOT of \"a\" and then AND with \"b\", if the result is all zeros, store 1 in \"and_not\", otherwise store 0 in \"and_not\"."
    },
    {
        "name": "_ktest_mask32_u8",
        "full_name": "unsigned char _ktest_mask32_u8(__mmask32 a, __mmask32 b, unsigned char *and_not);",
        "description": "Compute the bitwise AND of 32-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". Compute the bitwise NOT of \"a\" and then AND with \"b\", if the result is all zeros, store 1 in \"and_not\", otherwise store 0 in \"and_not\"."
    },
    {
        "name": "_ktest_mask64_u8",
        "full_name": "unsigned char _ktest_mask64_u8(__mmask64 a, __mmask64 b, unsigned char *and_not);",
        "description": "Compute the bitwise AND of 64-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". Compute the bitwise NOT of \"a\" and then AND with \"b\", if the result is all zeros, store 1 in \"and_not\", otherwise store 0 in \"and_not\"."
    },
    {
        "name": "_ktest_mask8_u8",
        "full_name": "unsigned char _ktest_mask8_u8(__mmask8 a, __mmask8 b, unsigned char *and_not);",
        "description": "Compute the bitwise AND of 8-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\". Compute the bitwise NOT of \"a\" and then AND with \"b\", if the result is all zeros, store 1 in \"and_not\", otherwise store 0 in \"and_not\"."
    },
    {
        "name": "_ktestc_mask16_u8",
        "full_name": "unsigned char _ktestc_mask16_u8(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise NOT of 16-bit mask \"a\" and then AND with \"b\", if the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktestc_mask32_u8",
        "full_name": "unsigned char _ktestc_mask32_u8(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise NOT of 32-bit mask \"a\" and then AND with \"b\", if the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktestc_mask64_u8",
        "full_name": "unsigned char _ktestc_mask64_u8(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise NOT of 64-bit mask \"a\" and then AND with \"b\", if the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktestc_mask8_u8",
        "full_name": "unsigned char _ktestc_mask8_u8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise NOT of 8-bit mask \"a\" and then AND with \"b\", if the result is all zeroes, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktestz_mask16_u8",
        "full_name": "unsigned char _ktestz_mask16_u8(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise AND of 16-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktestz_mask32_u8",
        "full_name": "unsigned char _ktestz_mask32_u8(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise AND of 32-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktestz_mask64_u8",
        "full_name": "unsigned char _ktestz_mask64_u8(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise AND of 64-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_ktestz_mask8_u8",
        "full_name": "unsigned char _ktestz_mask8_u8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise AND of 8-bit masks \"a\" and \"b\", and if the result is all zeros, store 1 in \"dst\", otherwise store 0 in \"dst\"."
    },
    {
        "name": "_mm_getexp_pd",
        "full_name": "__m128d _mm_getexp_pd(__m128d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm_mask_getexp_pd",
        "full_name": "__m128d _mm_mask_getexp_pd(__m128d src, __mmask8 k, __m128d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm_maskz_getexp_pd",
        "full_name": "__m128d _mm_maskz_getexp_pd(__mmask8 k, __m128d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm256_getexp_pd",
        "full_name": "__m256d _mm256_getexp_pd(__m256d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm256_mask_getexp_pd",
        "full_name": "__m256d _mm256_mask_getexp_pd(__m256d src, __mmask8 k, __m256d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm256_maskz_getexp_pd",
        "full_name": "__m256d _mm256_maskz_getexp_pd(__mmask8 k, __m256d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm512_getexp_pd",
        "full_name": "__m512d _mm512_getexp_pd(__m512d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm512_mask_getexp_pd",
        "full_name": "__m512d _mm512_mask_getexp_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm512_maskz_getexp_pd",
        "full_name": "__m512d _mm512_maskz_getexp_pd(__mmask8 k, __m512d a);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm_getexp_ps",
        "full_name": "__m128 _mm_getexp_ps(__m128 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm_mask_getexp_ps",
        "full_name": "__m128 _mm_mask_getexp_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm_maskz_getexp_ps",
        "full_name": "__m128 _mm_maskz_getexp_ps(__mmask8 k, __m128 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm256_getexp_ps",
        "full_name": "__m256 _mm256_getexp_ps(__m256 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm256_mask_getexp_ps",
        "full_name": "__m256 _mm256_mask_getexp_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm256_maskz_getexp_ps",
        "full_name": "__m256 _mm256_maskz_getexp_ps(__mmask8 k, __m256 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm512_getexp_ps",
        "full_name": "__m512 _mm512_getexp_ps(__m512 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm512_mask_getexp_ps",
        "full_name": "__m512 _mm512_mask_getexp_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm512_maskz_getexp_ps",
        "full_name": "__m512 _mm512_maskz_getexp_ps(__mmask16 k, __m512 a);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element."
    },
    {
        "name": "_mm512_getexp_round_pd",
        "full_name": "__m512d _mm512_getexp_round_pd(__m512d a, const int sae);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element.\n\t[sae_note]"
    },
    {
        "name": "_mm512_mask_getexp_round_pd",
        "full_name": "__m512d _mm512_mask_getexp_round_pd(__m512d src, __mmask8 k, __m512d a, int sae);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element.\n\t[sae_note]"
    },
    {
        "name": "_mm512_maskz_getexp_round_pd",
        "full_name": "__m512d _mm512_maskz_getexp_round_pd(__mmask8 k, __m512d a, int sae);",
        "description": "Convert the exponent of each packed double-precision (64-bit) floating-point element in \"a\" to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element.\n\t[sae_note]"
    },
    {
        "name": "_mm512_getexp_round_ps",
        "full_name": "__m512 _mm512_getexp_round_ps(__m512 a, int sae);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for each element.\n\t[sae_note]"
    },
    {
        "name": "_mm512_mask_getexp_round_ps",
        "full_name": "__m512 _mm512_mask_getexp_round_ps(__m512 src, __mmask16 k, __m512 a, int sae);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element.\n\t[sae_note]"
    },
    {
        "name": "_mm512_maskz_getexp_round_ps",
        "full_name": "__m512 _mm512_maskz_getexp_round_ps(__mmask16 k, __m512 a, int sae);",
        "description": "Convert the exponent of each packed single-precision (32-bit) floating-point element in \"a\" to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"floor(log2(x))\" for each element.\n\t[sae_note]"
    },
    {
        "name": "_mm_getexp_sd",
        "full_name": "__m128d _mm_getexp_sd(__m128d a, __m128d b);",
        "description": "Convert the exponent of the lower double-precision (64-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element."
    },
    {
        "name": "_mm_mask_getexp_sd",
        "full_name": "__m128d _mm_mask_getexp_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Convert the exponent of the lower double-precision (64-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element."
    },
    {
        "name": "_mm_maskz_getexp_sd",
        "full_name": "__m128d _mm_maskz_getexp_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Convert the exponent of the lower double-precision (64-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element."
    },
    {
        "name": "_mm_getexp_round_sd",
        "full_name": "__m128d _mm_getexp_round_sd(__m128d a, __m128d b, int sae);",
        "description": "Convert the exponent of the lower double-precision (64-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element.\n\t[sae_note]"
    },
    {
        "name": "_mm_mask_getexp_round_sd",
        "full_name": "__m128d _mm_mask_getexp_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int sae);",
        "description": "Convert the exponent of the lower double-precision (64-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element.\n\t[sae_note]"
    },
    {
        "name": "_mm_maskz_getexp_round_sd",
        "full_name": "__m128d _mm_maskz_getexp_round_sd(__mmask8 k, __m128d a, __m128d b, int sae);",
        "description": "Convert the exponent of the lower double-precision (64-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element.\n\t[sae_note]"
    },
    {
        "name": "_mm_getexp_ss",
        "full_name": "__m128 _mm_getexp_ss(__m128 a, __m128 b);",
        "description": "Convert the exponent of the lower single-precision (32-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element."
    },
    {
        "name": "_mm_mask_getexp_ss",
        "full_name": "__m128 _mm_mask_getexp_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Convert the exponent of the lower single-precision (32-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element."
    },
    {
        "name": "_mm_maskz_getexp_ss",
        "full_name": "__m128 _mm_maskz_getexp_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Convert the exponent of the lower single-precision (32-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element."
    },
    {
        "name": "_mm_getexp_round_ss",
        "full_name": "__m128 _mm_getexp_round_ss(__m128 a, __m128 b, int sae);",
        "description": "Convert the exponent of the lower single-precision (32-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element.\n\t[sae_note]"
    },
    {
        "name": "_mm_mask_getexp_round_ss",
        "full_name": "__m128 _mm_mask_getexp_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int sae);",
        "description": "Convert the exponent of the lower single-precision (32-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element.\n\t[sae_note]"
    },
    {
        "name": "_mm_maskz_getexp_round_ss",
        "full_name": "__m128 _mm_maskz_getexp_round_ss(__mmask8 k, __m128 a, __m128 b, int sae);",
        "description": "Convert the exponent of the lower single-precision (32-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"floor(log2(x))\" for the lower element.\n\t[sae_note]"
    },
    {
        "name": "_mm512_cvtepu64_ps",
        "full_name": "__m256 _mm512_cvtepu64_ps(__m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_cvtmask8_u32",
        "full_name": "unsigned int _cvtmask8_u32(__mmask8 a);",
        "description": "Convert 8-bit mask \"a\" into an integer value, and store the result in \"dst\"."
    },
    {
        "name": "_mm_clog_ps",
        "full_name": "__m128 _mm_clog_ps(__m128 a);",
        "description": "Compute the natural logarithm of packed complex numbers in \"a\", and store the complex results in \"dst\". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number \"complex = vec.fp32[0] + i * vec.fp32[1]\"."
    },
    {
        "name": "_mm256_clog_ps",
        "full_name": "__m256 _mm256_clog_ps(__m256 a);",
        "description": "Compute the natural logarithm of packed complex numbers in \"a\", and store the complex results in \"dst\". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number \"complex = vec.fp32[0] + i * vec.fp32[1]\"."
    },
    {
        "name": "_mm_mask_cvtpd_epi32",
        "full_name": "__m128i _mm_mask_cvtpd_epi32(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtpd_epi32",
        "full_name": "__m128i _mm_maskz_cvtpd_epi32(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtpd_epi32",
        "full_name": "__m128i _mm256_cvtpd_epi32(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtpd_epu32",
        "full_name": "__m128i _mm_cvtpd_epu32(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtpd_epu32",
        "full_name": "__m128i _mm_mask_cvtpd_epu32(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtpd_epu32",
        "full_name": "__m128i _mm_maskz_cvtpd_epu32(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtpd_epu32",
        "full_name": "__m128i _mm256_cvtpd_epu32(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtpd_epu32",
        "full_name": "__m128i _mm256_mask_cvtpd_epu32(__m128i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtpd_epu32",
        "full_name": "__m128i _mm256_maskz_cvtpd_epu32(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtpd_epu32",
        "full_name": "__m256i _mm512_cvtpd_epu32(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtpd_epu32",
        "full_name": "__m256i _mm512_mask_cvtpd_epu32(__m256i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtpd_epi32",
        "full_name": "__m128i _mm256_mask_cvtpd_epi32(__m128i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtpd_epi32",
        "full_name": "__m128i _mm256_maskz_cvtpd_epi32(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtpd_epi32",
        "full_name": "__m256i _mm512_cvtpd_epi32(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtpd_epi32",
        "full_name": "__m256i _mm512_mask_cvtpd_epi32(__m256i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtpd_epi32",
        "full_name": "__m256i _mm512_maskz_cvtpd_epi32(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtpd_epu32",
        "full_name": "__m256i _mm512_maskz_cvtpd_epu32(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cos_pd",
        "full_name": "__m128d _mm_cos_pd(__m128d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cos_pd",
        "full_name": "__m256d _mm256_cos_pd(__m256d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cos_pd",
        "full_name": "__m512d _mm512_cos_pd(__m512d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cos_pd",
        "full_name": "__m512d _mm512_mask_cos_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cos_ps",
        "full_name": "__m128 _mm_cos_ps(__m128 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cos_ps",
        "full_name": "__m256 _mm256_cos_ps(__m256 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cos_ps",
        "full_name": "__m512 _mm512_cos_ps(__m512 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cos_ps",
        "full_name": "__m512 _mm512_mask_cos_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_ceil_pd",
        "full_name": "__m256d _mm256_ceil_pd(__m256d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_ceil_pd",
        "full_name": "__m512d _mm512_ceil_pd(__m512d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_ceil_pd",
        "full_name": "__m512d _mm512_mask_ceil_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed double-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_ceil_ps",
        "full_name": "__m256 _mm256_ceil_ps(__m256 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed single-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_ceil_ps",
        "full_name": "__m512 _mm512_ceil_ps(__m512 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed single-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_ceil_ps",
        "full_name": "__m512 _mm512_mask_ceil_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed single-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpord_ps_mask",
        "full_name": "__mmask16 _mm512_cmpord_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmpunord_ps_mask",
        "full_name": "__mmask16 _mm512_cmpunord_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpunord_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmpunord_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if either is NaN, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmpord_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmpord_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" to see if neither is NaN, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_cvtmask16_u32",
        "full_name": "unsigned int _cvtmask16_u32(__mmask16 a);",
        "description": "Convert 16-bit mask \"a\" into an integer value, and store the result in \"dst\"."
    },
    {
        "name": "_cvtmask32_u32",
        "full_name": "unsigned int _cvtmask32_u32(__mmask32 a);",
        "description": "Convert 32-bit mask \"a\" into an integer value, and store the result in \"dst\"."
    },
    {
        "name": "_cvtmask64_u64",
        "full_name": "unsigned __int64 _cvtmask64_u64(__mmask64 a);",
        "description": "Convert 64-bit mask \"a\" into an integer value, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtepu64_pd",
        "full_name": "__m128d _mm_cvtepu64_pd(__m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepu64_pd",
        "full_name": "__m128d _mm_mask_cvtepu64_pd(__m128d src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu64_pd",
        "full_name": "__m128d _mm_maskz_cvtepu64_pd(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu64_pd",
        "full_name": "__m256d _mm256_cvtepu64_pd(__m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepu64_pd",
        "full_name": "__m256d _mm256_mask_cvtepu64_pd(__m256d src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu64_pd",
        "full_name": "__m256d _mm256_maskz_cvtepu64_pd(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu64_pd",
        "full_name": "__m512d _mm512_cvtepu64_pd(__m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_maskz_cvtepu32_ps",
        "full_name": "__m512 _mm512_maskz_cvtepu32_ps(__mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cvtepu64_pd",
        "full_name": "__m512d _mm512_mask_cvtepu64_pd(__m512d src, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu64_pd",
        "full_name": "__m512d _mm512_maskz_cvtepu64_pd(__mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepu8_epi32",
        "full_name": "__m128i _mm_mask_cvtepu8_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 4 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu8_epi32",
        "full_name": "__m128i _mm_maskz_cvtepu8_epi32(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in th elow 4 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu8_epi32",
        "full_name": "__m256i _mm256_cvtepu8_epi32(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cvtepu8_epi32",
        "full_name": "__m512i _mm512_cvtepu8_epi32(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepu8_epi64",
        "full_name": "__m128i _mm_mask_cvtepu8_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 2 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu8_epi64",
        "full_name": "__m128i _mm_maskz_cvtepu8_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 2 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu8_epi64",
        "full_name": "__m256i _mm256_cvtepu8_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 8 byte sof \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtepu64_ps",
        "full_name": "__m128 _mm_cvtepu64_ps(__m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepu64_ps",
        "full_name": "__m128 _mm_mask_cvtepu64_ps(__m128 src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtepu64_ps",
        "full_name": "__m128 _mm256_mask_cvtepu64_ps(__m128 src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu64_ps",
        "full_name": "__m128 _mm256_cvtepu64_ps(__m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu64_ps",
        "full_name": "__m256 _mm512_mask_cvtepu64_ps(__m256 src, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu64_ps",
        "full_name": "__m128 _mm_maskz_cvtepu64_ps(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu64_ps",
        "full_name": "__m128 _mm256_maskz_cvtepu64_ps(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu64_ps",
        "full_name": "__m256 _mm512_maskz_cvtepu64_ps(__mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepu8_epi16",
        "full_name": "__m128i _mm_mask_cvtepu8_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtepu8_epi16",
        "full_name": "__m256i _mm256_mask_cvtepu8_epi16(__m256i src, __mmask16 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu8_epi16",
        "full_name": "__m128i _mm_maskz_cvtepu8_epi16(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu8_epi16",
        "full_name": "__m256i _mm256_maskz_cvtepu8_epi16(__mmask16 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu8_epi16",
        "full_name": "__m256i _mm256_cvtepu8_epi16(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cvtepu8_epi16",
        "full_name": "__m512i _mm512_cvtepu8_epi16(__m256i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_maskz_cvtepu8_epi16",
        "full_name": "__m512i _mm512_maskz_cvtepu8_epi16(__mmask32 k, __m256i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cvtepu8_epi16",
        "full_name": "__m512i _mm512_mask_cvtepu8_epi16(__m512i src, __mmask32 k, __m256i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu32lo_pd",
        "full_name": "__m512d _mm512_cvtepu32lo_pd(__m512i v2);",
        "description": "Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in \"v2\" to packed double-precision (64-bit) floating-point elements, storing the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu32lo_pd",
        "full_name": "__m512d _mm512_mask_cvtepu32lo_pd(__m512d src, __mmask8 k, __m512i v2);",
        "description": "Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in \"v2\" to packed double-precision (64-bit) floating-point elements, storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtepu8_epi32",
        "full_name": "__m256i _mm256_mask_cvtepu8_epi32(__m256i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 8 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu8_epi32",
        "full_name": "__m256i _mm256_maskz_cvtepu8_epi32(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 8 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cvtepu8_epi32",
        "full_name": "__m512i _mm512_mask_cvtepu8_epi32(__m512i src, __mmask16 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu8_epi32",
        "full_name": "__m512i _mm512_maskz_cvtepu8_epi32(__mmask16 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtepu8_epi64",
        "full_name": "__m256i _mm256_mask_cvtepu8_epi64(__m256i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu8_epi64",
        "full_name": "__m256i _mm256_maskz_cvtepu8_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu8_epi64",
        "full_name": "__m512i _mm512_cvtepu8_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 8 byte sof \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu8_epi64",
        "full_name": "__m512i _mm512_mask_cvtepu8_epi64(__m512i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu8_epi64",
        "full_name": "__m512i _mm512_maskz_cvtepu8_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 8-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvti32_sd",
        "full_name": "__m128d _mm_cvti32_sd(__m128d a, int b);",
        "description": "Convert the signed 32-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvti32_ss",
        "full_name": "__m128 _mm_cvti32_ss(__m128 a, int b);",
        "description": "Convert the signed 32-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvti64_sd",
        "full_name": "__m128d _mm_cvti64_sd(__m128d a, __int64 b);",
        "description": "Convert the signed 64-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvti64_ss",
        "full_name": "__m128 _mm_cvti64_ss(__m128 a, __int64 b);",
        "description": "Convert the signed 64-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtpd_epu64",
        "full_name": "__m128i _mm_cvtpd_epu64(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtpd_epu64",
        "full_name": "__m128i _mm_mask_cvtpd_epu64(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtpd_epu64",
        "full_name": "__m128i _mm_maskz_cvtpd_epu64(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtpd_epu64",
        "full_name": "__m256i _mm256_cvtpd_epu64(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtpd_epu64",
        "full_name": "__m256i _mm256_mask_cvtpd_epu64(__m256i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtpd_epu64",
        "full_name": "__m256i _mm256_maskz_cvtpd_epu64(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtpd_epu64",
        "full_name": "__m512i _mm512_cvtpd_epu64(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtpd_epu64",
        "full_name": "__m512i _mm512_mask_cvtpd_epu64(__m512i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtpd_epu64",
        "full_name": "__m512i _mm512_maskz_cvtpd_epu64(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtpd_ps",
        "full_name": "__m128 _mm_cvtpd_ps(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtpd_ps",
        "full_name": "__m128 _mm_mask_cvtpd_ps(__m128 src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtpd_ps",
        "full_name": "__m128 _mm_maskz_cvtpd_ps(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtpd_ps",
        "full_name": "__m128 _mm256_cvtpd_ps(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtpd_ps",
        "full_name": "__m128 _mm256_mask_cvtpd_ps(__m128 src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtpd_epi64",
        "full_name": "__m128i _mm_cvtpd_epi64(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtsd_si64",
        "full_name": "__int64 _mm_cvtsd_si64(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtsd_si64x",
        "full_name": "__int64 _mm_cvtsd_si64x(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtpd_epi64",
        "full_name": "__m128i _mm_mask_cvtpd_epi64(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtpd_epi64",
        "full_name": "__m128i _mm_maskz_cvtpd_epi64(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtpd_epi64",
        "full_name": "__m256i _mm256_cvtpd_epi64(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtpd_epi64",
        "full_name": "__m256i _mm256_mask_cvtpd_epi64(__m256i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtpd_epi64",
        "full_name": "__m256i _mm256_maskz_cvtpd_epi64(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtpd_epi64",
        "full_name": "__m512i _mm512_cvtpd_epi64(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtpd_epi64",
        "full_name": "__m512i _mm512_mask_cvtpd_epi64(__m512i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtpd_epi64",
        "full_name": "__m512i _mm512_maskz_cvtpd_epi64(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_kxnor_mask8",
        "full_name": "__mmask8 _kxnor_mask8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise XNOR of 8-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kxnor_mask16",
        "full_name": "__mmask16 _kxnor_mask16(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise XNOR of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kxnor_mask32",
        "full_name": "__mmask32 _kxnor_mask32(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise XNOR of 32-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kxnor_mask64",
        "full_name": "__mmask64 _kxnor_mask64(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise XNOR of 64-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_mm_mask_maddubs_epi16",
        "full_name": "__m128i _mm_mask_maddubs_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply packed unsigned 8-bit integers in \"a\" by packed signed 8-bit integers in \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_maddubs_epi16",
        "full_name": "__m128i _mm_maskz_maddubs_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply packed unsigned 8-bit integers in \"a\" by packed signed 8-bit integers in \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maddubs_epi16",
        "full_name": "__m256i _mm256_maddubs_epi16(__m256i a, __m256i b);",
        "description": "Vertically multiply each unsigned 8-bit integer from \"a\" with the corresponding signed 8-bit integer from \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\"."
    },
    {
        "name": "_mm256_mask_maddubs_epi16",
        "full_name": "__m256i _mm256_mask_maddubs_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply packed unsigned 8-bit integers in \"a\" by packed signed 8-bit integers in \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_maddubs_epi16",
        "full_name": "__m256i _mm256_maskz_maddubs_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply packed unsigned 8-bit integers in \"a\" by packed signed 8-bit integers in \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maddubs_epi16",
        "full_name": "__m512i _mm512_maddubs_epi16(__m512i a, __m512i b);",
        "description": "Vertically multiply each unsigned 8-bit integer from \"a\" with the corresponding signed 8-bit integer from \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\"."
    },
    {
        "name": "_mm512_mask_maddubs_epi16",
        "full_name": "__m512i _mm512_mask_maddubs_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply packed unsigned 8-bit integers in \"a\" by packed signed 8-bit integers in \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_maddubs_epi16",
        "full_name": "__m512i _mm512_maskz_maddubs_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply packed unsigned 8-bit integers in \"a\" by packed signed 8-bit integers in \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_max_pd",
        "full_name": "__m128d _mm_mask_max_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm_maskz_max_pd",
        "full_name": "__m128d _mm_maskz_max_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm256_mask_max_pd",
        "full_name": "__m256d _mm256_mask_max_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm256_maskz_max_pd",
        "full_name": "__m256d _mm256_maskz_max_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm256_max_pd",
        "full_name": "__m256d _mm256_max_pd(__m256d a, __m256d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\". [max_float_note]"
    },
    {
        "name": "_mm512_mask_max_pd",
        "full_name": "__m512d _mm512_mask_max_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm512_maskz_max_pd",
        "full_name": "__m512d _mm512_maskz_max_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm512_max_pd",
        "full_name": "__m512d _mm512_max_pd(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\". [max_float_note]"
    },
    {
        "name": "_mm_mask_max_ps",
        "full_name": "__m128 _mm_mask_max_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm_maskz_max_ps",
        "full_name": "__m128 _mm_maskz_max_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm256_mask_max_ps",
        "full_name": "__m256 _mm256_mask_max_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm256_maskz_max_ps",
        "full_name": "__m256 _mm256_maskz_max_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm256_max_ps",
        "full_name": "__m256 _mm256_max_ps(__m256 a, __m256 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\". [max_float_note]"
    },
    {
        "name": "_mm512_mask_max_ps",
        "full_name": "__m512 _mm512_mask_max_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm512_maskz_max_ps",
        "full_name": "__m512 _mm512_maskz_max_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]"
    },
    {
        "name": "_mm512_max_ps",
        "full_name": "__m512 _mm512_max_ps(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\". [max_float_note]"
    },
    {
        "name": "_mm_mask_max_sd",
        "full_name": "__m128d _mm_mask_max_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_max_sd",
        "full_name": "__m128d _mm_maskz_max_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_max_ss",
        "full_name": "__m128 _mm_mask_max_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_max_ss",
        "full_name": "__m128 _mm_maskz_max_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_min_pd",
        "full_name": "__m128d _mm_mask_min_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm_maskz_min_pd",
        "full_name": "__m128d _mm_maskz_min_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm256_mask_min_pd",
        "full_name": "__m256d _mm256_mask_min_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm256_maskz_min_pd",
        "full_name": "__m256d _mm256_maskz_min_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm256_min_pd",
        "full_name": "__m256d _mm256_min_pd(__m256d a, __m256d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\". [min_float_note]"
    },
    {
        "name": "_mm512_mask_min_pd",
        "full_name": "__m512d _mm512_mask_min_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm512_maskz_min_pd",
        "full_name": "__m512d _mm512_maskz_min_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm512_min_pd",
        "full_name": "__m512d _mm512_min_pd(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\". [min_float_note]"
    },
    {
        "name": "_mm_mask_min_ps",
        "full_name": "__m128 _mm_mask_min_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm_maskz_min_ps",
        "full_name": "__m128 _mm_maskz_min_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm256_mask_min_ps",
        "full_name": "__m256 _mm256_mask_min_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm256_maskz_min_ps",
        "full_name": "__m256 _mm256_maskz_min_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm256_min_ps",
        "full_name": "__m256 _mm256_min_ps(__m256 a, __m256 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\". [min_float_note]"
    },
    {
        "name": "_mm512_mask_min_ps",
        "full_name": "__m512 _mm512_mask_min_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm512_maskz_min_ps",
        "full_name": "__m512 _mm512_maskz_min_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]"
    },
    {
        "name": "_mm512_min_ps",
        "full_name": "__m512 _mm512_min_ps(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\". [min_float_note]"
    },
    {
        "name": "_mm_mask_min_sd",
        "full_name": "__m128d _mm_mask_min_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_min_sd",
        "full_name": "__m128d _mm_maskz_min_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_min_ss",
        "full_name": "__m128 _mm_mask_min_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_min_ss",
        "full_name": "__m128 _mm_maskz_min_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm512_nearbyint_pd",
        "full_name": "__m512d _mm512_nearbyint_pd(__m512d a);",
        "description": "Rounds each packed double-precision (64-bit) floating-point element in \"a\" to the nearest integer value and stores the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_nearbyint_pd",
        "full_name": "__m512d _mm512_mask_nearbyint_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Rounds each packed double-precision (64-bit) floating-point element in \"a\" to the nearest integer value and stores the results as packed double-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_nearbyint_ps",
        "full_name": "__m512 _mm512_nearbyint_ps(__m512 a);",
        "description": "Rounds each packed single-precision (32-bit) floating-point element in \"a\" to the nearest integer value and stores the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_nearbyint_ps",
        "full_name": "__m512 _mm512_mask_nearbyint_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Rounds each packed single-precision (32-bit) floating-point element in \"a\" to the nearest integer value and stores the results as packed double-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_irem_epi32",
        "full_name": "__m128i _mm_irem_epi32(__m128i a, __m128i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_irem_epi32",
        "full_name": "__m256i _mm256_irem_epi32(__m256i a, __m256i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_div_round_sd",
        "full_name": "__m128d _mm_div_round_sd(__m128d a, __m128d b, int rounding);",
        "description": "Divide the lower double-precision (64-bit) floating-point element in \"a\" by the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_mask_div_round_sd",
        "full_name": "__m128d _mm_mask_div_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Divide the lower double-precision (64-bit) floating-point element in \"a\" by the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". \n\t\t[round_note]"
    },
    {
        "name": "_mm_maskz_div_round_sd",
        "full_name": "__m128d _mm_maskz_div_round_sd(__mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Divide the lower double-precision (64-bit) floating-point element in \"a\" by the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_div_round_ss",
        "full_name": "__m128 _mm_div_round_ss(__m128 a, __m128 b, int rounding);",
        "description": "Divide the lower single-precision (32-bit) floating-point element in \"a\" by the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_mask_div_round_ss",
        "full_name": "__m128 _mm_mask_div_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Divide the lower single-precision (32-bit) floating-point element in \"a\" by the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t\t[round_note]"
    },
    {
        "name": "_mm_maskz_div_round_ss",
        "full_name": "__m128 _mm_maskz_div_round_ss(__mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Divide the lower single-precision (32-bit) floating-point element in \"a\" by the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_cvtepi32_epi8",
        "full_name": "__m128i _mm_cvtepi32_epi8(__m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi32_epi8",
        "full_name": "__m128i _mm_mask_cvtepi32_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi32_epi8",
        "full_name": "__m128i _mm_maskz_cvtepi32_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi32_epi8",
        "full_name": "__m128i _mm256_cvtepi32_epi8(__m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi32_epi8",
        "full_name": "__m128i _mm256_mask_cvtepi32_epi8(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi32_epi8",
        "full_name": "__m128i _mm256_maskz_cvtepi32_epi8(__mmask8 k, __m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi32_epi8",
        "full_name": "__m128i _mm512_cvtepi32_epi8(__m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi32_epi8",
        "full_name": "__m128i _mm512_mask_cvtepi32_epi8(__m128i src, __mmask16 k, __m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi32_epi8",
        "full_name": "__m128i _mm512_maskz_cvtepi32_epi8(__mmask16 k, __m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi64_epi16",
        "full_name": "__m128i _mm_cvtepi64_epi16(__m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi64_epi16",
        "full_name": "__m128i _mm_mask_cvtepi64_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi64_epi16",
        "full_name": "__m128i _mm_maskz_cvtepi64_epi16(__mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi64_epi16",
        "full_name": "__m128i _mm256_cvtepi64_epi16(__m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_epi16",
        "full_name": "__m128i _mm256_mask_cvtepi64_epi16(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi64_epi16",
        "full_name": "__m128i _mm256_maskz_cvtepi64_epi16(__mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi64_epi16",
        "full_name": "__m128i _mm512_cvtepi64_epi16(__m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi64_epi16",
        "full_name": "__m128i _mm512_mask_cvtepi64_epi16(__m128i src, __mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi64_epi16",
        "full_name": "__m128i _mm512_maskz_cvtepi64_epi16(__mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi64_epi32",
        "full_name": "__m128i _mm_cvtepi64_epi32(__m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi64_epi32",
        "full_name": "__m128i _mm_mask_cvtepi64_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi64_epi32",
        "full_name": "__m128i _mm_maskz_cvtepi64_epi32(__mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi64_epi32",
        "full_name": "__m128i _mm256_cvtepi64_epi32(__m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_epi32",
        "full_name": "__m128i _mm256_mask_cvtepi64_epi32(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi64_epi32",
        "full_name": "__m128i _mm256_maskz_cvtepi64_epi32(__mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi64_epi32",
        "full_name": "__m256i _mm512_cvtepi64_epi32(__m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi64_epi32",
        "full_name": "__m256i _mm512_mask_cvtepi64_epi32(__m256i src, __mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi64_epi32",
        "full_name": "__m256i _mm512_maskz_cvtepi64_epi32(__mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi32_pd",
        "full_name": "__m128d _mm_cvtepi32_pd(__m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi32_pd",
        "full_name": "__m128d _mm_mask_cvtepi32_pd(__m128d src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi32_pd",
        "full_name": "__m128d _mm_maskz_cvtepi32_pd(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtepi32_pd",
        "full_name": "__m256d _mm256_mask_cvtepi32_pd(__m256d src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi32_pd",
        "full_name": "__m256d _mm256_maskz_cvtepi32_pd(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cvtepi32_pd",
        "full_name": "__m512d _mm512_mask_cvtepi32_pd(__m512d src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi32_pd",
        "full_name": "__m512d _mm512_maskz_cvtepi32_pd(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi32_ps",
        "full_name": "__m128 _mm_cvtepi32_ps(__m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi32_ps",
        "full_name": "__m128 _mm_mask_cvtepi32_ps(__m128 src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi32_ps",
        "full_name": "__m128 _mm_maskz_cvtepi32_ps(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtepi32_ps",
        "full_name": "__m256 _mm256_mask_cvtepi32_ps(__m256 src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi32_ps",
        "full_name": "__m256 _mm256_maskz_cvtepi32_ps(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cvtepi32_ps",
        "full_name": "__m512 _mm512_mask_cvtepi32_ps(__m512 src, __mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi32_ps",
        "full_name": "__m512 _mm512_maskz_cvtepi32_ps(__mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvt_roundps_epi32",
        "full_name": "__m512i _mm512_cvt_roundps_epi32(__m512 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundps_epi32",
        "full_name": "__m512i _mm512_mask_cvt_roundps_epi32(__m512i src, __mmask16 k, __m512 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundps_epi32",
        "full_name": "__m512i _mm512_maskz_cvt_roundps_epi32(__mmask16 k, __m512 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundps_epu32",
        "full_name": "__m512i _mm512_cvt_roundps_epu32(__m512 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundps_epu32",
        "full_name": "__m512i _mm512_mask_cvt_roundps_epu32(__m512i src, __mmask16 k, __m512 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundps_epu32",
        "full_name": "__m512i _mm512_maskz_cvt_roundps_epu32(__mmask16 k, __m512 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundpd_epi64",
        "full_name": "__m512i _mm512_cvt_roundpd_epi64(__m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundpd_epi64",
        "full_name": "__m512i _mm512_mask_cvt_roundpd_epi64(__m512i src, __mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundpd_epi64",
        "full_name": "__m512i _mm512_maskz_cvt_roundpd_epi64(__mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundpd_epu64",
        "full_name": "__m512i _mm512_cvt_roundpd_epu64(__m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundpd_epu64",
        "full_name": "__m512i _mm512_mask_cvt_roundpd_epu64(__m512i src, __mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundpd_epu64",
        "full_name": "__m512i _mm512_maskz_cvt_roundpd_epu64(__mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm_cvtepi64_epi8",
        "full_name": "__m128i _mm_cvtepi64_epi8(__m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi64_epi8",
        "full_name": "__m128i _mm_mask_cvtepi64_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi64_epi8",
        "full_name": "__m128i _mm_maskz_cvtepi64_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi64_epi8",
        "full_name": "__m128i _mm256_cvtepi64_epi8(__m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_epi8",
        "full_name": "__m128i _mm256_mask_cvtepi64_epi8(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi64_epi8",
        "full_name": "__m128i _mm256_maskz_cvtepi64_epi8(__mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi64_epi8",
        "full_name": "__m128i _mm512_cvtepi64_epi8(__m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi64_epi8",
        "full_name": "__m128i _mm512_mask_cvtepi64_epi8(__m128i src, __mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi64_epi8",
        "full_name": "__m128i _mm512_maskz_cvtepi64_epi8(__mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi64_pd",
        "full_name": "__m128d _mm_cvtepi64_pd(__m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi64_pd",
        "full_name": "__m128d _mm_mask_cvtepi64_pd(__m128d src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi64_pd",
        "full_name": "__m128d _mm_maskz_cvtepi64_pd(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi64_pd",
        "full_name": "__m256d _mm256_cvtepi64_pd(__m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_pd",
        "full_name": "__m256d _mm256_mask_cvtepi64_pd(__m256d src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi64_pd",
        "full_name": "__m256d _mm256_maskz_cvtepi64_pd(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi64_pd",
        "full_name": "__m512d _mm512_cvtepi64_pd(__m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_hsub_pi16",
        "full_name": "__m64 _mm_hsub_pi16(__m64 a, __m64 b);",
        "description": "Horizontally subtract adjacent pairs of 16-bit integers in \"a\" and \"b\", and pack the signed 16-bit results in \"dst\"."
    },
    {
        "name": "_mm_hsub_pi32",
        "full_name": "__m64 _mm_hsub_pi32(__m64 a, __m64 b);",
        "description": "Horizontally subtract adjacent pairs of 32-bit integers in \"a\" and \"b\", and pack the signed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm512_cvtepi32lo_pd",
        "full_name": "__m512d _mm512_cvtepi32lo_pd(__m512i v2);",
        "description": "Performs element-by-element conversion of the lower half of packed 32-bit integer elements in \"v2\" to packed double-precision (64-bit) floating-point elements, storing the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi32lo_pd",
        "full_name": "__m512d _mm512_mask_cvtepi32lo_pd(__m512d src, __mmask8 k, __m512i v2);",
        "description": "Performs element-by-element conversion of the lower half of packed 32-bit integer elements in \"v2\" to packed double-precision (64-bit) floating-point elements, storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_insertf128_si256",
        "full_name": "__m256i _mm256_insertf128_si256(__m256i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_insertf32x4",
        "full_name": "__m256 _mm256_insertf32x4(__m256 a, __m128 b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_mask_insertf32x4",
        "full_name": "__m256 _mm256_mask_insertf32x4(__m256 src, __mmask8 k, __m256 a, __m128 b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_insertf32x4",
        "full_name": "__m256 _mm256_maskz_insertf32x4(__mmask8 k, __m256 a, __m128 b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_insertf32x4",
        "full_name": "__m512 _mm512_insertf32x4(__m512 a, __m128 b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm512_mask_insertf32x4",
        "full_name": "__m512 _mm512_mask_insertf32x4(__m512 src, __mmask16 k, __m512 a, __m128 b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_insertf32x4",
        "full_name": "__m512 _mm512_maskz_insertf32x4(__mmask16 k, __m512 a, __m128 b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_insertf32x8",
        "full_name": "__m512 _mm512_mask_insertf32x8(__m512 src, __mmask16 k, __m512 a, __m256 b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_insertf32x8",
        "full_name": "__m512 _mm512_maskz_insertf32x8(__mmask16 k, __m512 a, __m256 b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_insertf64x2",
        "full_name": "__m256d _mm256_insertf64x2(__m256d a, __m128d b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_mask_insertf64x2",
        "full_name": "__m256d _mm256_mask_insertf64x2(__m256d src, __mmask8 k, __m256d a, __m128d b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_insertf64x2",
        "full_name": "__m256d _mm256_maskz_insertf64x2(__mmask8 k, __m256d a, __m128d b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_insertf64x2",
        "full_name": "__m512d _mm512_insertf64x2(__m512d a, __m128d b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm512_mask_insertf64x2",
        "full_name": "__m512d _mm512_mask_insertf64x2(__m512d src, __mmask8 k, __m512d a, __m128d b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_insertf64x2",
        "full_name": "__m512d _mm512_maskz_insertf64x2(__mmask8 k, __m512d a, __m128d b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_insertf64x4",
        "full_name": "__m512d _mm512_mask_insertf64x4(__m512d src, __mmask8 k, __m512d a, __m256d b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_extracti32x4_epi32",
        "full_name": "__m128i _mm256_mask_extracti32x4_epi32(__m128i src, __mmask8 k, __m256i a, int imm8);",
        "description": "Extract 128 bits (composed of 4 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_extracti32x4_epi32",
        "full_name": "__m128i _mm256_maskz_extracti32x4_epi32(__mmask8 k, __m256i a, int imm8);",
        "description": "Extract 128 bits (composed of 4 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_extracti32x4_epi32",
        "full_name": "__m128i _mm512_mask_extracti32x4_epi32(__m128i src, __mmask8 k, __m512i a, int imm8);",
        "description": "Extract 128 bits (composed of 4 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extracti32x4_epi32",
        "full_name": "__m128i _mm512_maskz_extracti32x4_epi32(__mmask8 k, __m512i a, int imm8);",
        "description": "Extract 128 bits (composed of 4 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_extracti32x8_epi32",
        "full_name": "__m256i _mm512_extracti32x8_epi32(__m512i a, int imm8);",
        "description": "Extract 256 bits (composed of 8 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_mask_extracti32x8_epi32",
        "full_name": "__m256i _mm512_mask_extracti32x8_epi32(__m256i src, __mmask8 k, __m512i a, int imm8);",
        "description": "Extract 256 bits (composed of 8 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extracti32x8_epi32",
        "full_name": "__m256i _mm512_maskz_extracti32x8_epi32(__mmask8 k, __m512i a, int imm8);",
        "description": "Extract 256 bits (composed of 8 packed 32-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_extracti64x2_epi64",
        "full_name": "__m128i _mm256_extracti64x2_epi64(__m256i a, int imm8);",
        "description": "Extract 128 bits (composed of 2 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_mask_extracti64x2_epi64",
        "full_name": "__m128i _mm256_mask_extracti64x2_epi64(__m128i src, __mmask8 k, __m256i a, int imm8);",
        "description": "Extract 128 bits (composed of 2 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_extracti64x2_epi64",
        "full_name": "__m128i _mm256_maskz_extracti64x2_epi64(__mmask8 k, __m256i a, int imm8);",
        "description": "Extract 128 bits (composed of 2 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_extracti64x2_epi64",
        "full_name": "__m128i _mm512_extracti64x2_epi64(__m512i a, int imm8);",
        "description": "Extract 128 bits (composed of 2 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_mask_extracti64x2_epi64",
        "full_name": "__m128i _mm512_mask_extracti64x2_epi64(__m128i src, __mmask8 k, __m512i a, int imm8);",
        "description": "Extract 128 bits (composed of 2 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extracti64x2_epi64",
        "full_name": "__m128i _mm512_maskz_extracti64x2_epi64(__mmask8 k, __m512i a, int imm8);",
        "description": "Extract 128 bits (composed of 2 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_extracti64x4_epi64",
        "full_name": "__m256i _mm512_extracti64x4_epi64(__m512i a, int imm8);",
        "description": "Extract 256 bits (composed of 4 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_mask_extracti64x4_epi64",
        "full_name": "__m256i _mm512_mask_extracti64x4_epi64(__m256i src, __mmask8 k, __m512i a, int imm8);",
        "description": "Extract 256 bits (composed of 4 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_extracti64x4_epi64",
        "full_name": "__m256i _mm512_maskz_extracti64x4_epi64(__mmask8 k, __m512i a, int imm8);",
        "description": "Extract 256 bits (composed of 4 packed 64-bit integers) from \"a\", selected with \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvttpd_epi64",
        "full_name": "__m128i _mm_cvttpd_epi64(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvttpd_epi64",
        "full_name": "__m128i _mm_mask_cvttpd_epi64(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttpd_epi64",
        "full_name": "__m128i _mm_maskz_cvttpd_epi64(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttpd_epi64",
        "full_name": "__m256i _mm256_cvttpd_epi64(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttpd_epi64",
        "full_name": "__m256i _mm256_mask_cvttpd_epi64(__m256i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttpd_epi64",
        "full_name": "__m256i _mm256_maskz_cvttpd_epi64(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttpd_epi64",
        "full_name": "__m512i _mm512_cvttpd_epi64(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttpd_epi64",
        "full_name": "__m512i _mm512_mask_cvttpd_epi64(__m512i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttpd_epi64",
        "full_name": "__m512i _mm512_maskz_cvttpd_epi64(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvttpd_epu64",
        "full_name": "__m128i _mm_cvttpd_epu64(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvttpd_epu64",
        "full_name": "__m128i _mm_mask_cvttpd_epu64(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttpd_epu64",
        "full_name": "__m128i _mm_maskz_cvttpd_epu64(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttpd_epu64",
        "full_name": "__m256i _mm256_cvttpd_epu64(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttpd_epu64",
        "full_name": "__m256i _mm256_mask_cvttpd_epu64(__m256i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttpd_epu64",
        "full_name": "__m256i _mm256_maskz_cvttpd_epu64(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttpd_epu64",
        "full_name": "__m512i _mm512_cvttpd_epu64(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttpd_epu64",
        "full_name": "__m512i _mm512_mask_cvttpd_epu64(__m512i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttpd_epu64",
        "full_name": "__m512i _mm512_maskz_cvttpd_epu64(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvttps_epi32",
        "full_name": "__m128i _mm_mask_cvttps_epi32(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttps_epi32",
        "full_name": "__m128i _mm_maskz_cvttps_epi32(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttps_epi32",
        "full_name": "__m256i _mm256_cvttps_epi32(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttps_epi32",
        "full_name": "__m256i _mm256_mask_cvttps_epi32(__m256i src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttps_epi32",
        "full_name": "__m256i _mm256_maskz_cvttps_epi32(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttps_epi32",
        "full_name": "__m512i _mm512_cvttps_epi32(__m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttps_epi32",
        "full_name": "__m512i _mm512_mask_cvttps_epi32(__m512i src, __mmask16 k, __m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttps_epi32",
        "full_name": "__m512i _mm512_maskz_cvttps_epi32(__mmask16 k, __m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvttps_epu32",
        "full_name": "__m128i _mm_cvttps_epu32(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvttps_epu32",
        "full_name": "__m128i _mm_mask_cvttps_epu32(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed double-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttps_epu32",
        "full_name": "__m128i _mm_maskz_cvttps_epu32(__mmask8 k, __m128 a);",
        "description": "Convert packed double-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttps_epu32",
        "full_name": "__m256i _mm256_cvttps_epu32(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttps_epu32",
        "full_name": "__m256i _mm256_mask_cvttps_epu32(__m256i src, __mmask8 k, __m256 a);",
        "description": "Convert packed double-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttps_epu32",
        "full_name": "__m256i _mm256_maskz_cvttps_epu32(__mmask8 k, __m256 a);",
        "description": "Convert packed double-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttps_epu32",
        "full_name": "__m512i _mm512_cvttps_epu32(__m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttps_epu32",
        "full_name": "__m512i _mm512_mask_cvttps_epu32(__m512i src, __mmask16 k, __m512 a);",
        "description": "Convert packed double-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttps_epu32",
        "full_name": "__m512i _mm512_maskz_cvttps_epu32(__mmask16 k, __m512 a);",
        "description": "Convert packed double-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvttpd_epi32",
        "full_name": "__m128i _mm_mask_cvttpd_epi32(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttpd_epi32",
        "full_name": "__m128i _mm_maskz_cvttpd_epi32(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttpd_epi32",
        "full_name": "__m128i _mm256_cvttpd_epi32(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttpd_epi32",
        "full_name": "__m128i _mm256_mask_cvttpd_epi32(__m128i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttpd_epi32",
        "full_name": "__m128i _mm256_maskz_cvttpd_epi32(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttpd_epi32",
        "full_name": "__m256i _mm512_cvttpd_epi32(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttpd_epi32",
        "full_name": "__m256i _mm512_mask_cvttpd_epi32(__m256i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttpd_epi32",
        "full_name": "__m256i _mm512_maskz_cvttpd_epi32(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvttpd_epu32",
        "full_name": "__m128i _mm_cvttpd_epu32(__m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvttpd_epu32",
        "full_name": "__m128i _mm_mask_cvttpd_epu32(__m128i src, __mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttpd_epu32",
        "full_name": "__m128i _mm_maskz_cvttpd_epu32(__mmask8 k, __m128d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttpd_epu32",
        "full_name": "__m128i _mm256_cvttpd_epu32(__m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttpd_epu32",
        "full_name": "__m128i _mm256_mask_cvttpd_epu32(__m128i src, __mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttpd_epu32",
        "full_name": "__m128i _mm256_maskz_cvttpd_epu32(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttpd_epu32",
        "full_name": "__m256i _mm512_cvttpd_epu32(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttpd_epu32",
        "full_name": "__m256i _mm512_mask_cvttpd_epu32(__m256i src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttpd_epu32",
        "full_name": "__m256i _mm512_maskz_cvttpd_epu32(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvttps_epi64",
        "full_name": "__m128i _mm_cvttps_epi64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvttps_epi64",
        "full_name": "__m128i _mm_mask_cvttps_epi64(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttps_epi64",
        "full_name": "__m128i _mm_maskz_cvttps_epi64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttps_epi64",
        "full_name": "__m256i _mm256_cvttps_epi64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttps_epi64",
        "full_name": "__m256i _mm256_mask_cvttps_epi64(__m256i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttps_epi64",
        "full_name": "__m256i _mm256_maskz_cvttps_epi64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttps_epi64",
        "full_name": "__m512i _mm512_cvttps_epi64(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttps_epi64",
        "full_name": "__m512i _mm512_mask_cvttps_epi64(__m512i src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttps_epi64",
        "full_name": "__m512i _mm512_maskz_cvttps_epi64(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvttps_epu64",
        "full_name": "__m128i _mm_cvttps_epu64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvttps_epu64",
        "full_name": "__m128i _mm_mask_cvttps_epu64(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvttps_epu64",
        "full_name": "__m128i _mm_maskz_cvttps_epu64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvttps_epu64",
        "full_name": "__m256i _mm256_cvttps_epu64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvttps_epu64",
        "full_name": "__m256i _mm256_mask_cvttps_epu64(__m256i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvttps_epu64",
        "full_name": "__m256i _mm256_maskz_cvttps_epu64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvttps_epu64",
        "full_name": "__m512i _mm512_cvttps_epu64(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvttps_epu64",
        "full_name": "__m512i _mm512_mask_cvttps_epu64(__m512i src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvttps_epu64",
        "full_name": "__m512i _mm512_maskz_cvttps_epu64(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvt_roundsd_i64",
        "full_name": "__int64 _mm_cvt_roundsd_i64(__m128d a, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsd_si64",
        "full_name": "__int64 _mm_cvt_roundsd_si64(__m128d a, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsd_u64",
        "full_name": "unsigned __int64 _mm_cvt_roundsd_u64(__m128d a, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 64-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundss_i32",
        "full_name": "int _mm_cvt_roundss_i32(__m128 a, int rounding);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundss_si32",
        "full_name": "int _mm_cvt_roundss_si32(__m128 a, int rounding);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundss_u32",
        "full_name": "unsigned int _mm_cvt_roundss_u32(__m128 a, int rounding);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 32-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundss_sd",
        "full_name": "__m128d _mm_cvt_roundss_sd(__m128d a, __m128 b, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". \n\t[sae_note]"
    },
    {
        "name": "_mm_mask_cvt_roundss_sd",
        "full_name": "__m128d _mm_mask_cvt_roundss_sd(__m128d src, __mmask8 k, __m128d a, __m128 b, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_maskz_cvt_roundss_sd",
        "full_name": "__m128d _mm_maskz_cvt_roundss_sd(__mmask8 k, __m128d a, __m128 b, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". \n\t[sae_note]"
    },
    {
        "name": "_mm512_cvt_roundps_pd",
        "full_name": "__m512d _mm512_cvt_roundps_pd(__m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\". [sae_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundps_pd",
        "full_name": "__m512d _mm512_mask_cvt_roundps_pd(__m512d src, __mmask8 k, __m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).  [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundps_pd",
        "full_name": "__m512d _mm512_maskz_cvt_roundps_pd(__mmask8 k, __m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm_cvtt_roundsd_i32",
        "full_name": "int _mm_cvtt_roundsd_i32(__m128d a, int sae);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundsd_i64",
        "full_name": "__int64 _mm_cvtt_roundsd_i64(__m128d a, int sae);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundsd_si32",
        "full_name": "int _mm_cvtt_roundsd_si32(__m128d a, int sae);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundsd_si64",
        "full_name": "__int64 _mm_cvtt_roundsd_si64(__m128d a, int sae);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundsd_u32",
        "full_name": "unsigned int _mm_cvtt_roundsd_u32(__m128d a, int sae);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 32-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundsd_u64",
        "full_name": "unsigned __int64 _mm_cvtt_roundsd_u64(__m128d a, int sae);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 64-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundss_i32",
        "full_name": "int _mm_cvtt_roundss_i32(__m128 a, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundss_i64",
        "full_name": "__int64 _mm_cvtt_roundss_i64(__m128 a, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundss_si32",
        "full_name": "int _mm_cvtt_roundss_si32(__m128 a, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundss_si64",
        "full_name": "__int64 _mm_cvtt_roundss_si64(__m128 a, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundss_u32",
        "full_name": "unsigned int _mm_cvtt_roundss_u32(__m128 a, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 32-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm_cvtt_roundss_u64",
        "full_name": "unsigned __int64 _mm_cvtt_roundss_u64(__m128 a, int sae);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 64-bit integer with truncation, and store the result in \"dst\".\n\t[sae_note]"
    },
    {
        "name": "_mm512_cvt_roundpd_epi32",
        "full_name": "__m256i _mm512_cvt_roundpd_epi32(__m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundpd_epi32",
        "full_name": "__m256i _mm512_mask_cvt_roundpd_epi32(__m256i src, __mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundpd_epi32",
        "full_name": "__m256i _mm512_maskz_cvt_roundpd_epi32(__mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundpd_epu32",
        "full_name": "__m256i _mm512_cvt_roundpd_epu32(__m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundpd_epu32",
        "full_name": "__m256i _mm512_mask_cvt_roundpd_epu32(__m256i src, __mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundpd_epu32",
        "full_name": "__m256i _mm512_maskz_cvt_roundpd_epu32(__mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundpd_ps",
        "full_name": "__m256 _mm512_cvt_roundpd_ps(__m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundpd_ps",
        "full_name": "__m256 _mm512_mask_cvt_roundpd_ps(__m256 src, __mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundpd_ps",
        "full_name": "__m256 _mm512_maskz_cvt_roundpd_ps(__mmask8 k, __m512d a, int rounding);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsd_i32",
        "full_name": "int _mm_cvt_roundsd_i32(__m128d a, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsd_si32",
        "full_name": "int _mm_cvt_roundsd_si32(__m128d a, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsd_ss",
        "full_name": "__m128 _mm_cvt_roundsd_ss(__m128 a, __m128d b, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_cvt_roundsd_ss",
        "full_name": "__m128 _mm_mask_cvt_roundsd_ss(__m128 src, __mmask8 k, __m128 a, __m128d b, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_cvt_roundsd_ss",
        "full_name": "__m128 _mm_maskz_cvt_roundsd_ss(__mmask8 k, __m128 a, __m128d b, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsd_u32",
        "full_name": "unsigned int _mm_cvt_roundsd_u32(__m128d a, int rounding);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 32-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsi32_ss",
        "full_name": "__m128 _mm_cvt_roundsi32_ss(__m128 a, int b, int rounding);",
        "description": "Convert the signed 32-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsi64_sd",
        "full_name": "__m128d _mm_cvt_roundsi64_sd(__m128d a, __int64 b, int rounding);",
        "description": "Convert the signed 64-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundsi64_ss",
        "full_name": "__m128 _mm_cvt_roundsi64_ss(__m128 a, __int64 b, int rounding);",
        "description": "Convert the signed 64-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundss_i64",
        "full_name": "__int64 _mm_cvt_roundss_i64(__m128 a, int rounding);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundss_si64",
        "full_name": "__int64 _mm_cvt_roundss_si64(__m128 a, int rounding);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundss_u64",
        "full_name": "unsigned __int64 _mm_cvt_roundss_u64(__m128 a, int rounding);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 64-bit integer, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundu32_ss",
        "full_name": "__m128 _mm_cvt_roundu32_ss(__m128 a, unsigned int b, int rounding);",
        "description": "Convert the unsigned 32-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundu64_sd",
        "full_name": "__m128d _mm_cvt_roundu64_sd(__m128d a, unsigned __int64 b, int rounding);",
        "description": "Convert the unsigned 64-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_cvt_roundu64_ss",
        "full_name": "__m128 _mm_cvt_roundu64_ss(__m128 a, unsigned __int64 b, int rounding);",
        "description": "Convert the unsigned 64-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundps_epi64",
        "full_name": "__m512i _mm512_cvt_roundps_epi64(__m256 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundps_epi64",
        "full_name": "__m512i _mm512_mask_cvt_roundps_epi64(__m512i src, __mmask8 k, __m256 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t [round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundps_epi64",
        "full_name": "__m512i _mm512_maskz_cvt_roundps_epi64(__mmask8 k, __m256 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundps_epu64",
        "full_name": "__m512i _mm512_cvt_roundps_epu64(__m256 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundps_epu64",
        "full_name": "__m512i _mm512_mask_cvt_roundps_epu64(__m512i src, __mmask8 k, __m256 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundps_epu64",
        "full_name": "__m512i _mm512_maskz_cvt_roundps_epu64(__mmask8 k, __m256 a, int rounding);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_cvt_roundph_ps",
        "full_name": "__m512 _mm512_cvt_roundph_ps(__m256i a, int sae);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\". [sae_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundph_ps",
        "full_name": "__m512 _mm512_mask_cvt_roundph_ps(__m512 src, __mmask16 k, __m256i a, int sae);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundph_ps",
        "full_name": "__m512 _mm512_maskz_cvt_roundph_ps(__mmask16 k, __m256i a, int sae);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm_mask_cvt_roundps_ph",
        "full_name": "__m128i _mm_mask_cvt_roundps_ph(__m128i src, __mmask8 k, __m128 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_maskz_cvt_roundps_ph",
        "full_name": "__m128i _mm_maskz_cvt_roundps_ph(__mmask8 k, __m128 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_mask_cvt_roundps_ph",
        "full_name": "__m128i _mm256_mask_cvt_roundps_ph(__m128i src, __mmask8 k, __m256 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_maskz_cvt_roundps_ph",
        "full_name": "__m128i _mm256_maskz_cvt_roundps_ph(__mmask8 k, __m256 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_cvt_roundps_ph",
        "full_name": "__m256i _mm512_cvt_roundps_ph(__m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\". [round2_note]"
    },
    {
        "name": "_mm512_mask_cvt_roundps_ph",
        "full_name": "__m256i _mm512_mask_cvt_roundps_ph(__m256i src, __mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round2_note]"
    },
    {
        "name": "_mm512_maskz_cvt_roundps_ph",
        "full_name": "__m256i _mm512_maskz_cvt_roundps_ph(__mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round2_note]"
    },
    {
        "name": "_mm_mask_cvtepi16_storeu_epi8",
        "full_name": "void _mm_mask_cvtepi16_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtepi16_storeu_epi8",
        "full_name": "void _mm256_mask_cvtepi16_storeu_epi8(void *base_addr, __mmask16 k, __m256i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtepi16_storeu_epi8",
        "full_name": "void _mm512_mask_cvtepi16_storeu_epi8(void *base_addr, __mmask32 k, __m512i a);",
        "description": "Convert packed 16-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtepi32_storeu_epi16",
        "full_name": "void _mm_mask_cvtepi32_storeu_epi16(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtepi32_storeu_epi16",
        "full_name": "void _mm256_mask_cvtepi32_storeu_epi16(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtepi32_storeu_epi16",
        "full_name": "void _mm512_mask_cvtepi32_storeu_epi16(void *base_addr, __mmask16 k, __m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtepi32_storeu_epi8",
        "full_name": "void _mm_mask_cvtepi32_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtepi32_storeu_epi8",
        "full_name": "void _mm256_mask_cvtepi32_storeu_epi8(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtepi32_storeu_epi8",
        "full_name": "void _mm512_mask_cvtepi32_storeu_epi8(void *base_addr, __mmask16 k, __m512i a);",
        "description": "Convert packed 32-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_reduce_max_epi32",
        "full_name": "int _mm512_mask_reduce_max_epi32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed signed 32-bit integers in \"a\" by maximum using mask \"k\". Returns the maximum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_max_epu32",
        "full_name": "unsigned int _mm512_mask_reduce_max_epu32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed unsigned 32-bit integers in \"a\" by maximum using mask \"k\". Returns the maximum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_min_epi32",
        "full_name": "int _mm512_mask_reduce_min_epi32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed signed 32-bit integers in \"a\" by maximum using mask \"k\". Returns the minimum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_min_epu32",
        "full_name": "unsigned int _mm512_mask_reduce_min_epu32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed unsigned 32-bit integers in \"a\" by maximum using mask \"k\". Returns the minimum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_max_pd",
        "full_name": "double _mm512_mask_reduce_max_pd(__mmask8 k, __m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by maximum using mask \"k\". Returns the maximum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_min_pd",
        "full_name": "double _mm512_reduce_min_pd(__m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by minimum. Returns the minimum of all elements in \"a\". [min_float_note]"
    },
    {
        "name": "_mm512_mask_reduce_min_pd",
        "full_name": "double _mm512_mask_reduce_min_pd(__mmask8 k, __m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by maximum using mask \"k\". Returns the minimum of all active elements in \"a\". [min_float_note]"
    },
    {
        "name": "_mm512_reduce_max_epi64",
        "full_name": "__int64 _mm512_reduce_max_epi64(__m512i a);",
        "description": "Reduce the packed signed 64-bit integers in \"a\" by maximum. Returns the maximum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_max_epi64",
        "full_name": "__int64 _mm512_mask_reduce_max_epi64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed signed 64-bit integers in \"a\" by maximum using mask \"k\". Returns the maximum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_max_epu64",
        "full_name": "unsigned __int64 _mm512_reduce_max_epu64(__m512i a);",
        "description": "Reduce the packed unsigned 64-bit integers in \"a\" by maximum. Returns the maximum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_max_epu64",
        "full_name": "unsigned __int64 _mm512_mask_reduce_max_epu64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed unsigned 64-bit integers in \"a\" by maximum using mask \"k\". Returns the maximum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_min_epi64",
        "full_name": "__int64 _mm512_reduce_min_epi64(__m512i a);",
        "description": "Reduce the packed signed 64-bit integers in \"a\" by minimum. Returns the minimum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_min_epi64",
        "full_name": "__int64 _mm512_mask_reduce_min_epi64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed signed 64-bit integers in \"a\" by maximum using mask \"k\". Returns the minimum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_min_epu64",
        "full_name": "unsigned __int64 _mm512_reduce_min_epu64(__m512i a);",
        "description": "Reduce the packed unsigned 64-bit integers in \"a\" by minimum. Returns the minimum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_min_epu64",
        "full_name": "unsigned __int64 _mm512_mask_reduce_min_epu64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed unsigned 64-bit integers in \"a\" by minimum using mask \"k\". Returns the minimum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_max_ps",
        "full_name": "float _mm512_reduce_max_ps(__m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by maximum. Returns the maximum of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_max_ps",
        "full_name": "float _mm512_mask_reduce_max_ps(__mmask16 k, __m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by maximum using mask \"k\". Returns the maximum of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_min_ps",
        "full_name": "float _mm512_reduce_min_ps(__m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by minimum. Returns the minimum of all elements in \"a\". [min_float_note]"
    },
    {
        "name": "_mm512_mask_reduce_min_ps",
        "full_name": "float _mm512_mask_reduce_min_ps(__mmask16 k, __m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by maximum using mask \"k\". Returns the minimum of all active elements in \"a\". [min_float_note]"
    },
    {
        "name": "_mm_rem_epu16",
        "full_name": "__m128i _mm_rem_epu16(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 16-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epu16",
        "full_name": "__m256i _mm256_rem_epu16(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 16-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epu16",
        "full_name": "__m512i _mm512_rem_epu16(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 16-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_rem_epi16",
        "full_name": "__m128i _mm_rem_epi16(__m128i a, __m128i b);",
        "description": "Divide packed 16-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epi16",
        "full_name": "__m256i _mm256_rem_epi16(__m256i a, __m256i b);",
        "description": "Divide packed 16-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epi16",
        "full_name": "__m512i _mm512_rem_epi16(__m512i a, __m512i b);",
        "description": "Divide packed 16-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_rem_epi32",
        "full_name": "__m128i _mm_rem_epi32(__m128i a, __m128i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epi32",
        "full_name": "__m256i _mm256_rem_epi32(__m256i a, __m256i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epi32",
        "full_name": "__m512i _mm512_rem_epi32(__m512i a, __m512i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_mask_rem_epi32",
        "full_name": "__m512i _mm512_mask_rem_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_rem_epu32",
        "full_name": "__m128i _mm_rem_epu32(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epu32",
        "full_name": "__m256i _mm256_rem_epu32(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epu32",
        "full_name": "__m512i _mm512_rem_epu32(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_mask_rem_epu32",
        "full_name": "__m512i _mm512_mask_rem_epu32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_rem_epi64",
        "full_name": "__m128i _mm_rem_epi64(__m128i a, __m128i b);",
        "description": "Divide packed 64-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epi64",
        "full_name": "__m256i _mm256_rem_epi64(__m256i a, __m256i b);",
        "description": "Divide packed 64-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epi64",
        "full_name": "__m512i _mm512_rem_epi64(__m512i a, __m512i b);",
        "description": "Divide packed 64-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_rem_epu64",
        "full_name": "__m128i _mm_rem_epu64(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 64-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epu64",
        "full_name": "__m256i _mm256_rem_epu64(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 64-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epu64",
        "full_name": "__m512i _mm512_rem_epu64(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 64-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_rem_epi8",
        "full_name": "__m128i _mm_rem_epi8(__m128i a, __m128i b);",
        "description": "Divide packed 8-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epi8",
        "full_name": "__m256i _mm256_rem_epi8(__m256i a, __m256i b);",
        "description": "Divide packed 8-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epi8",
        "full_name": "__m512i _mm512_rem_epi8(__m512i a, __m512i b);",
        "description": "Divide packed 8-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_rem_epu8",
        "full_name": "__m128i _mm_rem_epu8(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 8-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_rem_epu8",
        "full_name": "__m256i _mm256_rem_epu8(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 8-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm512_rem_epu8",
        "full_name": "__m512i _mm512_rem_epu8(__m512i a, __m512i b);",
        "description": "Divide packed unsigned 8-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_abs_epi16",
        "full_name": "__m128i _mm_abs_epi16(__m128i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm_mask_abs_epi16",
        "full_name": "__m128i _mm_mask_abs_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_abs_epi16",
        "full_name": "__m128i _mm_maskz_abs_epi16(__mmask8 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_abs_epi16",
        "full_name": "__m256i _mm256_abs_epi16(__m256i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm256_mask_abs_epi16",
        "full_name": "__m256i _mm256_mask_abs_epi16(__m256i src, __mmask16 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_abs_epi16",
        "full_name": "__m256i _mm256_maskz_abs_epi16(__mmask16 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_abs_epi16",
        "full_name": "__m512i _mm512_abs_epi16(__m512i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm512_mask_abs_epi16",
        "full_name": "__m512i _mm512_mask_abs_epi16(__m512i src, __mmask32 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_abs_epi16",
        "full_name": "__m512i _mm512_maskz_abs_epi16(__mmask32 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_abs_epi32",
        "full_name": "__m128i _mm_abs_epi32(__m128i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm_mask_abs_epi32",
        "full_name": "__m128i _mm_mask_abs_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_abs_epi32",
        "full_name": "__m128i _mm_maskz_abs_epi32(__mmask8 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_abs_epi32",
        "full_name": "__m256i _mm256_abs_epi32(__m256i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm256_mask_abs_epi32",
        "full_name": "__m256i _mm256_mask_abs_epi32(__m256i src, __mmask8 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_abs_epi32",
        "full_name": "__m256i _mm256_maskz_abs_epi32(__mmask8 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_abs_epi32",
        "full_name": "__m512i _mm512_abs_epi32(__m512i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm512_mask_abs_epi32",
        "full_name": "__m512i _mm512_mask_abs_epi32(__m512i src, __mmask16 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_abs_epi32",
        "full_name": "__m512i _mm512_maskz_abs_epi32(__mmask16 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_abs_epi64",
        "full_name": "__m128i _mm_abs_epi64(__m128i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm_mask_abs_epi64",
        "full_name": "__m128i _mm_mask_abs_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_abs_epi64",
        "full_name": "__m128i _mm_maskz_abs_epi64(__mmask8 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_abs_epi64",
        "full_name": "__m256i _mm256_abs_epi64(__m256i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm256_mask_abs_epi64",
        "full_name": "__m256i _mm256_mask_abs_epi64(__m256i src, __mmask8 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_abs_epi64",
        "full_name": "__m256i _mm256_maskz_abs_epi64(__mmask8 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_abs_epi64",
        "full_name": "__m512i _mm512_abs_epi64(__m512i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm512_mask_abs_epi64",
        "full_name": "__m512i _mm512_mask_abs_epi64(__m512i src, __mmask8 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_abs_epi64",
        "full_name": "__m512i _mm512_maskz_abs_epi64(__mmask8 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 64-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_abs_epi8",
        "full_name": "__m128i _mm_abs_epi8(__m128i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm_mask_abs_epi8",
        "full_name": "__m128i _mm_mask_abs_epi8(__m128i src, __mmask16 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_abs_epi8",
        "full_name": "__m128i _mm_maskz_abs_epi8(__mmask16 k, __m128i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_abs_epi8",
        "full_name": "__m256i _mm256_abs_epi8(__m256i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm256_mask_abs_epi8",
        "full_name": "__m256i _mm256_mask_abs_epi8(__m256i src, __mmask32 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_abs_epi8",
        "full_name": "__m256i _mm256_maskz_abs_epi8(__mmask32 k, __m256i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_abs_epi8",
        "full_name": "__m512i _mm512_mask_abs_epi8(__m512i src, __mmask64 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_abs_epi8",
        "full_name": "__m512i _mm512_maskz_abs_epi8(__mmask64 k, __m512i a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_abs_pd",
        "full_name": "__m512d _mm512_abs_pd(__m512d v2);",
        "description": "Finds the absolute value of each packed double-precision (64-bit) floating-point element in \"v2\", storing the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_abs_pd",
        "full_name": "__m512d _mm512_mask_abs_pd(__m512d src, __mmask8 k, __m512d v2);",
        "description": "Finds the absolute value of each packed double-precision (64-bit) floating-point element in \"v2\", storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_abs_pi16",
        "full_name": "__m64 _mm_abs_pi16(__m64 a);",
        "description": "Compute the absolute value of packed signed 16-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm_abs_pi32",
        "full_name": "__m64 _mm_abs_pi32(__m64 a);",
        "description": "Compute the absolute value of packed signed 32-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm_abs_pi8",
        "full_name": "__m64 _mm_abs_pi8(__m64 a);",
        "description": "Compute the absolute value of packed signed 8-bit integers in \"a\", and store the unsigned results in \"dst\"."
    },
    {
        "name": "_mm512_abs_ps",
        "full_name": "__m512 _mm512_abs_ps(__m512 v2);",
        "description": "Finds the absolute value of each packed single-precision (32-bit) floating-point element in \"v2\", storing the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_abs_ps",
        "full_name": "__m512 _mm512_mask_abs_ps(__m512 src, __mmask16 k, __m512 v2);",
        "description": "Finds the absolute value of each packed single-precision (32-bit) floating-point element in \"v2\", storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_rint_pd",
        "full_name": "__m512d _mm512_rint_pd(__m512d a);",
        "description": "Rounds the packed double-precision (64-bit) floating-point elements in \"a\" to the nearest even integer value and stores the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_rint_pd",
        "full_name": "__m512d _mm512_mask_rint_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Rounds the packed double-precision (64-bit) floating-point elements in \"a\" to the nearest even integer value and stores the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_rint_ps",
        "full_name": "__m512 _mm512_rint_ps(__m512 a);",
        "description": "Rounds the packed single-precision (32-bit) floating-point elements in \"a\" to the nearest even integer value and stores the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_rint_ps",
        "full_name": "__m512 _mm512_mask_rint_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Rounds the packed single-precision (32-bit) floating-point elements in \"a\" to the nearest even integer value and stores the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_tzcnt_32",
        "full_name": "int _mm_tzcnt_32(unsigned int a);",
        "description": "Count the number of trailing zero bits in unsigned 32-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_mm_tzcnt_64",
        "full_name": "__int64 _mm_tzcnt_64(unsigned __int64 a);",
        "description": "Count the number of trailing zero bits in unsigned 64-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_tzcnt_u32",
        "full_name": "unsigned int _tzcnt_u32(unsigned int a);",
        "description": "Count the number of trailing zero bits in unsigned 32-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_tzcnt_u64",
        "full_name": "unsigned __int64 _tzcnt_u64(unsigned __int64 a);",
        "description": "Count the number of trailing zero bits in unsigned 64-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_mm_reduce_pd",
        "full_name": "__m128d _mm_reduce_pd(__m128d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_mask_reduce_pd",
        "full_name": "__m128d _mm_mask_reduce_pd(__m128d src, __mmask8 k, __m128d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_maskz_reduce_pd",
        "full_name": "__m128d _mm_maskz_reduce_pd(__mmask8 k, __m128d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_reduce_pd",
        "full_name": "__m256d _mm256_reduce_pd(__m256d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm256_mask_reduce_pd",
        "full_name": "__m256d _mm256_mask_reduce_pd(__m256d src, __mmask8 k, __m256d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_maskz_reduce_pd",
        "full_name": "__m256d _mm256_maskz_reduce_pd(__mmask8 k, __m256d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_reduce_pd",
        "full_name": "__m512d _mm512_reduce_pd(__m512d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm512_mask_reduce_pd",
        "full_name": "__m512d _mm512_mask_reduce_pd(__m512d src, __mmask8 k, __m512d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_maskz_reduce_pd",
        "full_name": "__m512d _mm512_maskz_reduce_pd(__mmask8 k, __m512d a, int imm8);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_reduce_round_pd",
        "full_name": "__m512d _mm512_reduce_round_pd(__m512d a, int imm8, int sae);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_mask_reduce_round_pd",
        "full_name": "__m512d _mm512_mask_reduce_round_pd(__m512d src, __mmask8 k, __m512d a, int imm8, int sae);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_maskz_reduce_round_pd",
        "full_name": "__m512d _mm512_maskz_reduce_round_pd(__mmask8 k, __m512d a, int imm8, int sae);",
        "description": "Extract the reduced argument of packed double-precision (64-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_reduce_ps",
        "full_name": "__m128 _mm_reduce_ps(__m128 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_mask_reduce_ps",
        "full_name": "__m128 _mm_mask_reduce_ps(__m128 src, __mmask8 k, __m128 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_maskz_reduce_ps",
        "full_name": "__m128 _mm_maskz_reduce_ps(__mmask8 k, __m128 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_reduce_ps",
        "full_name": "__m256 _mm256_reduce_ps(__m256 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm256_mask_reduce_ps",
        "full_name": "__m256 _mm256_mask_reduce_ps(__m256 src, __mmask8 k, __m256 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_maskz_reduce_ps",
        "full_name": "__m256 _mm256_maskz_reduce_ps(__mmask8 k, __m256 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_reduce_ps",
        "full_name": "__m512 _mm512_reduce_ps(__m512 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm512_mask_reduce_ps",
        "full_name": "__m512 _mm512_mask_reduce_ps(__m512 src, __mmask16 k, __m512 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_maskz_reduce_ps",
        "full_name": "__m512 _mm512_maskz_reduce_ps(__mmask16 k, __m512 a, int imm8);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_reduce_round_ps",
        "full_name": "__m512 _mm512_reduce_round_ps(__m512 a, int imm8, int sae);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_mask_reduce_round_ps",
        "full_name": "__m512 _mm512_mask_reduce_round_ps(__m512 src, __mmask16 k, __m512 a, int imm8, int sae);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_maskz_reduce_round_ps",
        "full_name": "__m512 _mm512_maskz_reduce_round_ps(__mmask16 k, __m512 a, int imm8, int sae);",
        "description": "Extract the reduced argument of packed single-precision (32-bit) floating-point elements in \"a\" by the number of bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_reduce_sd",
        "full_name": "__m128d _mm_reduce_sd(__m128d a, __m128d b, int imm8);",
        "description": "Extract the reduced argument of the lower double-precision (64-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_mask_reduce_sd",
        "full_name": "__m128d _mm_mask_reduce_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int imm8);",
        "description": "Extract the reduced argument of the lower double-precision (64-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_maskz_reduce_sd",
        "full_name": "__m128d _mm_maskz_reduce_sd(__mmask8 k, __m128d a, __m128d b, int imm8);",
        "description": "Extract the reduced argument of the lower double-precision (64-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_reduce_round_sd",
        "full_name": "__m128d _mm_reduce_round_sd(__m128d a, __m128d b, int imm8, int sae);",
        "description": "Extract the reduced argument of the lower double-precision (64-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_mask_reduce_round_sd",
        "full_name": "__m128d _mm_mask_reduce_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int imm8, int sae);",
        "description": "Extract the reduced argument of the lower double-precision (64-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_maskz_reduce_round_sd",
        "full_name": "__m128d _mm_maskz_reduce_round_sd(__mmask8 k, __m128d a, __m128d b, int imm8, int sae);",
        "description": "Extract the reduced argument of the lower double-precision (64-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_reduce_ss",
        "full_name": "__m128 _mm_reduce_ss(__m128 a, __m128 b, int imm8);",
        "description": "Extract the reduced argument of the lower single-precision (32-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_mask_reduce_ss",
        "full_name": "__m128 _mm_mask_reduce_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int imm8);",
        "description": "Extract the reduced argument of the lower single-precision (32-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_maskz_reduce_ss",
        "full_name": "__m128 _mm_maskz_reduce_ss(__mmask8 k, __m128 a, __m128 b, int imm8);",
        "description": "Extract the reduced argument of the lower single-precision (32-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_reduce_round_ss",
        "full_name": "__m128 _mm_reduce_round_ss(__m128 a, __m128 b, int imm8, int sae);",
        "description": "Extract the reduced argument of the lower single-precision (32-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_mask_reduce_round_ss",
        "full_name": "__m128 _mm_mask_reduce_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int imm8, int sae);",
        "description": "Extract the reduced argument of the lower single-precision (32-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_maskz_reduce_round_ss",
        "full_name": "__m128 _mm_maskz_reduce_round_ss(__mmask8 k, __m128 a, __m128 b, int imm8, int sae);",
        "description": "Extract the reduced argument of the lower single-precision (32-bit) floating-point element in \"b\" by the number of bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_aesdeclast_si128",
        "full_name": "__m128i _mm_aesdeclast_si128(__m128i a, __m128i RoundKey);",
        "description": "Perform the last round of an AES decryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_aesenclast_si128",
        "full_name": "__m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey);",
        "description": "Perform the last round of an AES encryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the result in \"dst\".\""
    },
    {
        "name": "_mm_aesdec_si128",
        "full_name": "__m128i _mm_aesdec_si128(__m128i a, __m128i RoundKey);",
        "description": "Perform one round of an AES decryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_aesenc_si128",
        "full_name": "__m128i _mm_aesenc_si128(__m128i a, __m128i RoundKey);",
        "description": "Perform one round of an AES encryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the result in \"dst\".\""
    },
    {
        "name": "_mm_aesimc_si128",
        "full_name": "__m128i _mm_aesimc_si128(__m128i a);",
        "description": "Perform the InvMixColumns transformation on \"a\" and store the result in \"dst\"."
    },
    {
        "name": "_mm_aeskeygenassist_si128",
        "full_name": "__m128i _mm_aeskeygenassist_si128(__m128i a, const int imm8);",
        "description": "Assist in expanding the AES cipher key by computing steps towards generating a round key for encryption cipher using data from \"a\" and an 8-bit round constant specified in \"imm8\", and store the result in \"dst\".\""
    },
    {
        "name": "_mm_acos_pd",
        "full_name": "__m128d _mm_acos_pd(__m128d a);",
        "description": "Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_acos_pd",
        "full_name": "__m256d _mm256_acos_pd(__m256d a);",
        "description": "Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_acos_pd",
        "full_name": "__m512d _mm512_acos_pd(__m512d a);",
        "description": "Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_acos_pd",
        "full_name": "__m512d _mm512_mask_acos_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_acos_ps",
        "full_name": "__m128 _mm_acos_ps(__m128 a);",
        "description": "Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_acos_ps",
        "full_name": "__m256 _mm256_acos_ps(__m256 a);",
        "description": "Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_acos_ps",
        "full_name": "__m512 _mm512_acos_ps(__m512 a);",
        "description": "Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_acos_ps",
        "full_name": "__m512 _mm512_mask_acos_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_acosh_pd",
        "full_name": "__m128d _mm_acosh_pd(__m128d a);",
        "description": "Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_acosh_pd",
        "full_name": "__m256d _mm256_acosh_pd(__m256d a);",
        "description": "Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_acosh_pd",
        "full_name": "__m512d _mm512_acosh_pd(__m512d a);",
        "description": "Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_acosh_pd",
        "full_name": "__m512d _mm512_mask_acosh_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_acosh_ps",
        "full_name": "__m128 _mm_acosh_ps(__m128 a);",
        "description": "Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_acosh_ps",
        "full_name": "__m256 _mm256_acosh_ps(__m256 a);",
        "description": "Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_acosh_ps",
        "full_name": "__m512 _mm512_acosh_ps(__m512 a);",
        "description": "Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_acosh_ps",
        "full_name": "__m512 _mm512_mask_acosh_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_asin_pd",
        "full_name": "__m128d _mm_asin_pd(__m128d a);",
        "description": "Compute the inverse sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_asin_pd",
        "full_name": "__m256d _mm256_asin_pd(__m256d a);",
        "description": "Compute the inverse sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_asin_pd",
        "full_name": "__m512d _mm512_asin_pd(__m512d a);",
        "description": "Compute the inverse sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_asin_pd",
        "full_name": "__m512d _mm512_mask_asin_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_asin_ps",
        "full_name": "__m128 _mm_asin_ps(__m128 a);",
        "description": "Compute the inverse sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_asin_ps",
        "full_name": "__m256 _mm256_asin_ps(__m256 a);",
        "description": "Compute the inverse sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_asin_ps",
        "full_name": "__m512 _mm512_asin_ps(__m512 a);",
        "description": "Compute the inverse sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_asin_ps",
        "full_name": "__m512 _mm512_mask_asin_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_asinh_pd",
        "full_name": "__m128d _mm_asinh_pd(__m128d a);",
        "description": "Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_asinh_pd",
        "full_name": "__m256d _mm256_asinh_pd(__m256d a);",
        "description": "Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_asinh_pd",
        "full_name": "__m512d _mm512_asinh_pd(__m512d a);",
        "description": "Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_asinh_pd",
        "full_name": "__m512d _mm512_mask_asinh_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_asinh_ps",
        "full_name": "__m128 _mm_asinh_ps(__m128 a);",
        "description": "Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_asinh_ps",
        "full_name": "__m256 _mm256_asinh_ps(__m256 a);",
        "description": "Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_asinh_ps",
        "full_name": "__m512 _mm512_asinh_ps(__m512 a);",
        "description": "Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_asinh_ps",
        "full_name": "__m512 _mm512_mask_asinh_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_adds_epu16",
        "full_name": "__m128i _mm_maskz_adds_epu16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_adds_epu16",
        "full_name": "__m256i _mm256_mask_adds_epu16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_adds_epu16",
        "full_name": "__m256i _mm256_maskz_adds_epu16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_adds_epu16",
        "full_name": "__m512i _mm512_mask_adds_epu16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_adds_epu16",
        "full_name": "__m512i _mm512_maskz_adds_epu16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_adds_epu8",
        "full_name": "__m128i _mm_mask_adds_epu8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpacklo_ps",
        "full_name": "__m128 _mm_mask_unpacklo_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpacklo_ps",
        "full_name": "__m128 _mm_maskz_unpacklo_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_unpacklo_ps",
        "full_name": "__m128 _mm_unpacklo_ps(__m128 a, __m128 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpacklo_ps",
        "full_name": "__m256 _mm256_mask_unpacklo_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpacklo_ps",
        "full_name": "__m256 _mm256_maskz_unpacklo_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpacklo_ps",
        "full_name": "__m256 _mm256_unpacklo_ps(__m256 a, __m256 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpacklo_ps",
        "full_name": "__m512 _mm512_mask_unpacklo_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpacklo_ps",
        "full_name": "__m512 _mm512_maskz_unpacklo_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpacklo_ps",
        "full_name": "__m512 _mm512_unpacklo_ps(__m512 a, __m512 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_roundscale_pd",
        "full_name": "__m128d _mm_mask_roundscale_pd(__m128d src, __mmask8 k, __m128d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_maskz_roundscale_pd",
        "full_name": "__m128d _mm_maskz_roundscale_pd(__mmask8 k, __m128d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_roundscale_pd",
        "full_name": "__m128d _mm_roundscale_pd(__m128d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm256_mask_roundscale_pd",
        "full_name": "__m256d _mm256_mask_roundscale_pd(__m256d src, __mmask8 k, __m256d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_maskz_roundscale_pd",
        "full_name": "__m256d _mm256_maskz_roundscale_pd(__mmask8 k, __m256d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_roundscale_pd",
        "full_name": "__m256d _mm256_roundscale_pd(__m256d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm512_mask_roundscale_pd",
        "full_name": "__m512d _mm512_mask_roundscale_pd(__m512d src, __mmask8 k, __m512d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_maskz_roundscale_pd",
        "full_name": "__m512d _mm512_maskz_roundscale_pd(__mmask8 k, __m512d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_roundscale_pd",
        "full_name": "__m512d _mm512_roundscale_pd(__m512d a, int imm8);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_mask_roundscale_ps",
        "full_name": "__m128 _mm_mask_roundscale_ps(__m128 src, __mmask8 k, __m128 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_maskz_roundscale_ps",
        "full_name": "__m128 _mm_maskz_roundscale_ps(__mmask8 k, __m128 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_roundscale_ps",
        "full_name": "__m128 _mm_roundscale_ps(__m128 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm256_mask_roundscale_ps",
        "full_name": "__m256 _mm256_mask_roundscale_ps(__m256 src, __mmask8 k, __m256 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_maskz_roundscale_ps",
        "full_name": "__m256 _mm256_maskz_roundscale_ps(__mmask8 k, __m256 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_roundscale_ps",
        "full_name": "__m256 _mm256_roundscale_ps(__m256 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm512_mask_roundscale_ps",
        "full_name": "__m512 _mm512_mask_roundscale_ps(__m512 src, __mmask16 k, __m512 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_maskz_roundscale_ps",
        "full_name": "__m512 _mm512_maskz_roundscale_ps(__mmask16 k, __m512 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_roundscale_ps",
        "full_name": "__m512 _mm512_roundscale_ps(__m512 a, int imm8);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_roundscale_sd",
        "full_name": "__m128d _mm_roundscale_sd(__m128d a, __m128d b, const int imm8);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_maskz_adds_epu8",
        "full_name": "__m128i _mm_maskz_adds_epu8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_adds_epu8",
        "full_name": "__m256i _mm256_mask_adds_epu8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_adds_epu8",
        "full_name": "__m256i _mm256_maskz_adds_epu8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_adds_epu8",
        "full_name": "__m512i _mm512_mask_adds_epu8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_adds_epu8",
        "full_name": "__m512i _mm512_maskz_adds_epu8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_adds_epi8",
        "full_name": "__m512i _mm512_mask_adds_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_adds_epi8",
        "full_name": "__m512i _mm512_maskz_adds_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_roundscale_sd",
        "full_name": "__m128d _mm_mask_roundscale_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, const int imm8);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_maskz_roundscale_sd",
        "full_name": "__m128d _mm_maskz_roundscale_sd(__mmask8 k, __m128d a, __m128d b, const int imm8);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_urem_epi32",
        "full_name": "__m128i _mm_urem_epi32(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm256_urem_epi32",
        "full_name": "__m256i _mm256_urem_epi32(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the remainders as packed unsigned 32-bit integers in \"dst\"."
    },
    {
        "name": "_mm_i64gather_ps",
        "full_name": "__m128 _mm_i64gather_ps(float const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64gather_ps",
        "full_name": "__m128 _mm256_i64gather_ps(float const * base_addr, __m256i vindex, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i64gather_pd",
        "full_name": "__m128d _mm_i64gather_pd(double const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64gather_pd",
        "full_name": "__m256d _mm256_i64gather_pd(double const * base_addr, __m256i vindex, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64gather_pd",
        "full_name": "__m512d _mm512_i64gather_pd(__m512i vindex, void const * base_addr, int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64gather_pd",
        "full_name": "__m128d _mm_mask_i64gather_pd(__m128d src, double const * base_addr, __m128i vindex, __m128d mask, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i64gather_pd",
        "full_name": "__m128d _mm_mmask_i64gather_pd(__m128d src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i64gather_pd",
        "full_name": "__m256d _mm256_mmask_i64gather_pd(__m256d src, __mmask8 k, __m256i vindex, void const * base_addr, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i64gather_ps",
        "full_name": "__m128 _mm_mmask_i64gather_ps(__m128 src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64gather_ps",
        "full_name": "__m128 _mm_mask_i64gather_ps(__m128 src, float const * base_addr, __m128i vindex, __m128 mask, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i64gather_ps",
        "full_name": "__m128 _mm256_mask_i64gather_ps(__m128 src, float const * base_addr, __m256i vindex, __m128 mask, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64gather_ps",
        "full_name": "__m256 _mm512_i64gather_ps(__m512i vindex, void const * base_addr, int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i64gather_pd",
        "full_name": "__m256d _mm256_mask_i64gather_pd(__m256d src, double const * base_addr, __m256i vindex, __m256d mask, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64gather_pd",
        "full_name": "__m512d _mm512_mask_i64gather_pd(__m512d src, __mmask8 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i64gather_ps",
        "full_name": "__m128 _mm256_mmask_i64gather_ps(__m128 src, __mmask8 k, __m256i vindex, void const * base_addr, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64gather_ps",
        "full_name": "__m256 _mm512_mask_i64gather_ps(__m256 src, __mmask8 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_roundscale_round_pd",
        "full_name": "__m512d _mm512_mask_roundscale_round_pd(__m512d src, __mmask8 k, __m512d a, int imm8, int sae);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_maskz_roundscale_round_pd",
        "full_name": "__m512d _mm512_maskz_roundscale_round_pd(__mmask8 k, __m512d a, int imm8, int sae);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_roundscale_round_pd",
        "full_name": "__m512d _mm512_roundscale_round_pd(__m512d a, int imm8, int sae);",
        "description": "Round packed double-precision (64-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_mask_roundscale_round_ps",
        "full_name": "__m512 _mm512_mask_roundscale_round_ps(__m512 src, __mmask16 k, __m512 a, int imm8, int sae);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_maskz_roundscale_round_ps",
        "full_name": "__m512 _mm512_maskz_roundscale_round_ps(__mmask16 k, __m512 a, int imm8, int sae);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_roundscale_round_ps",
        "full_name": "__m512 _mm512_roundscale_round_ps(__m512 a, int imm8, int sae);",
        "description": "Round packed single-precision (32-bit) floating-point elements in \"a\" to the number of fraction bits specified by \"imm8\", and store the results in \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_mask_roundscale_round_sd",
        "full_name": "__m128d _mm_mask_roundscale_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, const int imm8, const int sae);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_maskz_roundscale_round_sd",
        "full_name": "__m128d _mm_maskz_roundscale_round_sd(__mmask8 k, __m128d a, __m128d b, const int imm8, const int sae);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_roundscale_round_sd",
        "full_name": "__m128d _mm_roundscale_round_sd(__m128d a, __m128d b, const int imm8, const int sae);",
        "description": "Round the lower double-precision (64-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_mask_roundscale_round_ss",
        "full_name": "__m128 _mm_mask_roundscale_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, const int imm8, const int sae);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_maskz_roundscale_round_ss",
        "full_name": "__m128 _mm_maskz_roundscale_round_ss(__mmask8 k, __m128 a, __m128 b, const int imm8, const int sae);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm_roundscale_round_ss",
        "full_name": "__m128 _mm_roundscale_round_ss(__m128 a, __m128 b, const int imm8, const int sae);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note][sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundpd_epi32",
        "full_name": "__m256i _mm512_cvtt_roundpd_epi32(__m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\".  [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundpd_epi32",
        "full_name": "__m256i _mm512_mask_cvtt_roundpd_epi32(__m256i src, __mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundpd_epi32",
        "full_name": "__m256i _mm512_maskz_cvtt_roundpd_epi32(__mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundpd_epi64",
        "full_name": "__m512i _mm512_cvtt_roundpd_epi64(__m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\". [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundpd_epi64",
        "full_name": "__m512i _mm512_mask_cvtt_roundpd_epi64(__m512i src, __mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundpd_epi64",
        "full_name": "__m512i _mm512_maskz_cvtt_roundpd_epi64(__mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundpd_epu32",
        "full_name": "__m256i _mm512_cvtt_roundpd_epu32(__m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\".  [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundpd_epu32",
        "full_name": "__m256i _mm512_mask_cvtt_roundpd_epu32(__m256i src, __mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).   [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundpd_epu32",
        "full_name": "__m256i _mm512_maskz_cvtt_roundpd_epu32(__mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundpd_epu64",
        "full_name": "__m512i _mm512_cvtt_roundpd_epu64(__m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\". [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundpd_epu64",
        "full_name": "__m512i _mm512_mask_cvtt_roundpd_epu64(__m512i src, __mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundpd_epu64",
        "full_name": "__m512i _mm512_maskz_cvtt_roundpd_epu64(__mmask8 k, __m512d a, int sae);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundps_epi32",
        "full_name": "__m512i _mm512_cvtt_roundps_epi32(__m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\".  [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundps_epi32",
        "full_name": "__m512i _mm512_mask_cvtt_roundps_epi32(__m512i src, __mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).  [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundps_epi32",
        "full_name": "__m512i _mm512_maskz_cvtt_roundps_epi32(__mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundps_epi64",
        "full_name": "__m512i _mm512_cvtt_roundps_epi64(__m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\". [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundps_epi64",
        "full_name": "__m512i _mm512_mask_cvtt_roundps_epi64(__m512i src, __mmask8 k, __m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundps_epi64",
        "full_name": "__m512i _mm512_maskz_cvtt_roundps_epi64(__mmask8 k, __m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundps_epu32",
        "full_name": "__m512i _mm512_cvtt_roundps_epu32(__m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\".  [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundps_epu32",
        "full_name": "__m512i _mm512_mask_cvtt_roundps_epu32(__m512i src, __mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).   [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundps_epu32",
        "full_name": "__m512i _mm512_maskz_cvtt_roundps_epu32(__mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]"
    },
    {
        "name": "_mm512_cvtt_roundps_epu64",
        "full_name": "__m512i _mm512_cvtt_roundps_epu64(__m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\". [sae_note]"
    },
    {
        "name": "_mm512_mask_cvtt_roundps_epu64",
        "full_name": "__m512i _mm512_mask_cvtt_roundps_epu64(__m512i src, __mmask8 k, __m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_maskz_cvtt_roundps_epu64",
        "full_name": "__m512i _mm512_maskz_cvtt_roundps_epu64(__mmask8 k, __m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers with truncation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [sae_note]"
    },
    {
        "name": "_mm512_kxnor",
        "full_name": "__mmask16 _mm512_kxnor(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise XNOR of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_mm512_mask2int",
        "full_name": "int _mm512_mask2int(__mmask16 k1);",
        "description": "Converts bit mask \"k1\" into an integer value, storing the results in \"dst\"."
    },
    {
        "name": "_mm_erf_ps",
        "full_name": "__m128 _mm_erf_ps(__m128 a);",
        "description": "Compute the error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_erf_ps",
        "full_name": "__m256 _mm256_erf_ps(__m256 a);",
        "description": "Compute the error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_erf_ps",
        "full_name": "__m512 _mm512_erf_ps(__m512 a);",
        "description": "Compute the error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_erf_pd",
        "full_name": "__m128d _mm_erf_pd(__m128d a);",
        "description": "Compute the error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_erf_pd",
        "full_name": "__m256d _mm256_erf_pd(__m256d a);",
        "description": "Compute the error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_erf_pd",
        "full_name": "__m512d _mm512_erf_pd(__m512d a);",
        "description": "Compute the error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_erfc_ps",
        "full_name": "__m128 _mm_erfc_ps(__m128 a);",
        "description": "Compute the complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_erfc_ps",
        "full_name": "__m256 _mm256_erfc_ps(__m256 a);",
        "description": "Compute the complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_erfc_ps",
        "full_name": "__m512 _mm512_erfc_ps(__m512 a);",
        "description": "Compute the complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_erfc_pd",
        "full_name": "__m128d _mm_erfc_pd(__m128d a);",
        "description": "Compute the complementary error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_erfc_pd",
        "full_name": "__m256d _mm256_erfc_pd(__m256d a);",
        "description": "Compute the complementary error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_erfc_pd",
        "full_name": "__m512d _mm512_erfc_pd(__m512d a);",
        "description": "Compute the complementary error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_erf_ps",
        "full_name": "__m512 _mm512_mask_erf_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_erf_pd",
        "full_name": "__m512d _mm512_mask_erf_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_erfc_ps",
        "full_name": "__m512 _mm512_mask_erfc_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_erfc_pd",
        "full_name": "__m512d _mm512_mask_erfc_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the complementary error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtps_epi32",
        "full_name": "__m128i _mm_mask_cvtps_epi32(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtps_epi32",
        "full_name": "__m128i _mm_maskz_cvtps_epi32(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtps_epi32",
        "full_name": "__m256i _mm256_cvtps_epi32(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtps_epi32",
        "full_name": "__m256i _mm256_mask_cvtps_epi32(__m256i src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtps_epi32",
        "full_name": "__m256i _mm256_maskz_cvtps_epi32(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtps_epi32",
        "full_name": "__m512i _mm512_cvtps_epi32(__m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtps_epi32",
        "full_name": "__m512i _mm512_mask_cvtps_epi32(__m512i src, __mmask16 k, __m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtps_epi32",
        "full_name": "__m512i _mm512_maskz_cvtps_epi32(__mmask16 k, __m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepi8_epi32",
        "full_name": "__m128i _mm_mask_cvtepi8_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 4 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi8_epi32",
        "full_name": "__m128i _mm_maskz_cvtepi8_epi32(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 4 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi8_epi32",
        "full_name": "__m256i _mm256_cvtepi8_epi32(__m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi8_epi32",
        "full_name": "__m256i _mm256_mask_cvtepi8_epi32(__m256i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 8 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi8_epi32",
        "full_name": "__m256i _mm256_maskz_cvtepi8_epi32(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 8 bytes of \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi8_epi32",
        "full_name": "__m512i _mm512_cvtepi8_epi32(__m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi8_epi32",
        "full_name": "__m512i _mm512_mask_cvtepi8_epi32(__m512i src, __mmask16 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi8_epi32",
        "full_name": "__m512i _mm512_maskz_cvtepi8_epi32(__mmask16 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepi8_epi64",
        "full_name": "__m128i _mm_mask_cvtepi8_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 2 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi8_epi64",
        "full_name": "__m128i _mm_maskz_cvtepi8_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 2 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtepi8_epi64",
        "full_name": "__m256i _mm256_mask_cvtepi8_epi64(__m256i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi8_epi64",
        "full_name": "__m256i _mm256_maskz_cvtepi8_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi8_epi16",
        "full_name": "__m128i _mm_cvtepi8_epi16(__m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi8_epi16",
        "full_name": "__m128i _mm_mask_cvtepi8_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi8_epi16",
        "full_name": "__m128i _mm_maskz_cvtepi8_epi16(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi8_epi16",
        "full_name": "__m256i _mm256_cvtepi8_epi16(__m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi8_epi16",
        "full_name": "__m256i _mm256_mask_cvtepi8_epi16(__m256i src, __mmask16 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi8_epi16",
        "full_name": "__m256i _mm256_maskz_cvtepi8_epi16(__mmask16 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi8_epi16",
        "full_name": "__m512i _mm512_cvtepi8_epi16(__m256i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi8_epi16",
        "full_name": "__m512i _mm512_mask_cvtepi8_epi16(__m512i src, __mmask32 k, __m256i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi8_epi16",
        "full_name": "__m512i _mm512_maskz_cvtepi8_epi16(__mmask32 k, __m256i a);",
        "description": "Sign extend packed 8-bit integers in \"a\" to packed 16-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi8_epi64",
        "full_name": "__m256i _mm256_cvtepi8_epi64(__m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cvtepi8_epi64",
        "full_name": "__m512i _mm512_cvtepi8_epi64(__m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi8_epi64",
        "full_name": "__m512i _mm512_mask_cvtepi8_epi64(__m512i src, __mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi8_epi64",
        "full_name": "__m512i _mm512_maskz_cvtepi8_epi64(__mmask8 k, __m128i a);",
        "description": "Sign extend packed 8-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepu16_epi32",
        "full_name": "__m128i _mm_mask_cvtepu16_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu16_epi32",
        "full_name": "__m128i _mm_maskz_cvtepu16_epi32(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu16_epi32",
        "full_name": "__m256i _mm256_cvtepu16_epi32(__m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepu16_epi32",
        "full_name": "__m256i _mm256_mask_cvtepu16_epi32(__m256i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu16_epi32",
        "full_name": "__m256i _mm256_maskz_cvtepu16_epi32(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu16_epi32",
        "full_name": "__m512i _mm512_cvtepu16_epi32(__m256i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu16_epi32",
        "full_name": "__m512i _mm512_mask_cvtepu16_epi32(__m512i src, __mmask16 k, __m256i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu16_epi32",
        "full_name": "__m512i _mm512_maskz_cvtepu16_epi32(__mmask16 k, __m256i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepi64_ps",
        "full_name": "__m128 _mm_cvtepi64_ps(__m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepi64_ps",
        "full_name": "__m128 _mm_mask_cvtepi64_ps(__m128 src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepi64_ps",
        "full_name": "__m128 _mm_maskz_cvtepi64_ps(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepi64_ps",
        "full_name": "__m128 _mm256_cvtepi64_ps(__m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_ps",
        "full_name": "__m128 _mm256_mask_cvtepi64_ps(__m128 src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepi64_ps",
        "full_name": "__m128 _mm256_maskz_cvtepi64_ps(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepi64_ps",
        "full_name": "__m256 _mm512_cvtepi64_ps(__m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepi64_pd",
        "full_name": "__m512d _mm512_mask_cvtepi64_pd(__m512d src, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi64_pd",
        "full_name": "__m512d _mm512_maskz_cvtepi64_pd(__mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cvtepi64_ps",
        "full_name": "__m256 _mm512_mask_cvtepi64_ps(__m256 src, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepi64_ps",
        "full_name": "__m256 _mm512_maskz_cvtepi64_ps(__mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtps_epi64",
        "full_name": "__m128i _mm_cvtps_epi64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cvtps_epi64",
        "full_name": "__m256i _mm256_cvtps_epi64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cvtps_epi64",
        "full_name": "__m512i _mm512_cvtps_epi64(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtps_epi64",
        "full_name": "__m128i _mm_mask_cvtps_epi64(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtps_epi64",
        "full_name": "__m128i _mm_maskz_cvtps_epi64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cvtps_epi64",
        "full_name": "__m256i _mm256_mask_cvtps_epi64(__m256i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtps_epi64",
        "full_name": "__m256i _mm256_maskz_cvtps_epi64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cvtps_epi64",
        "full_name": "__m512i _mm512_mask_cvtps_epi64(__m512i src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtps_epi64",
        "full_name": "__m512i _mm512_maskz_cvtps_epi64(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtps_epu32",
        "full_name": "__m128i _mm_cvtps_epu32(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtps_epu32",
        "full_name": "__m128i _mm_mask_cvtps_epu32(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtps_epu32",
        "full_name": "__m128i _mm_maskz_cvtps_epu32(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtps_epu32",
        "full_name": "__m256i _mm256_cvtps_epu32(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtps_epu32",
        "full_name": "__m256i _mm256_mask_cvtps_epu32(__m256i src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtps_epu32",
        "full_name": "__m256i _mm256_maskz_cvtps_epu32(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtps_epu32",
        "full_name": "__m512i _mm512_cvtps_epu32(__m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtps_epu32",
        "full_name": "__m512i _mm512_mask_cvtps_epu32(__m512i src, __mmask16 k, __m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtps_epu32",
        "full_name": "__m512i _mm512_maskz_cvtps_epu32(__mmask16 k, __m512 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 32-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtpd_ps",
        "full_name": "__m128 _mm256_maskz_cvtpd_ps(__mmask8 k, __m256d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtpd_ps",
        "full_name": "__m256 _mm512_cvtpd_ps(__m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtpd_ps",
        "full_name": "__m256 _mm512_mask_cvtpd_ps(__m256 src, __mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtpd_ps",
        "full_name": "__m256 _mm512_maskz_cvtpd_ps(__mmask8 k, __m512d a);",
        "description": "Convert packed double-precision (64-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepi64_storeu_epi16",
        "full_name": "void _mm_mask_cvtepi64_storeu_epi16(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_storeu_epi16",
        "full_name": "void _mm256_mask_cvtepi64_storeu_epi16(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtepi64_storeu_epi16",
        "full_name": "void _mm512_mask_cvtepi64_storeu_epi16(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtepi64_storeu_epi32",
        "full_name": "void _mm_mask_cvtepi64_storeu_epi32(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_storeu_epi32",
        "full_name": "void _mm256_mask_cvtepi64_storeu_epi32(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtepi64_storeu_epi32",
        "full_name": "void _mm512_mask_cvtepi64_storeu_epi32(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtepi64_storeu_epi8",
        "full_name": "void _mm_mask_cvtepi64_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtepi64_storeu_epi8",
        "full_name": "void _mm256_mask_cvtepi64_storeu_epi8(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtepi64_storeu_epi8",
        "full_name": "void _mm512_mask_cvtepi64_storeu_epi8(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed 64-bit integers in \"a\" to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_maskz_cvtph_ps",
        "full_name": "__m128 _mm_maskz_cvtph_ps(__mmask8 k, __m128i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtph_ps",
        "full_name": "__m256 _mm256_cvtph_ps(__m128i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtph_ps",
        "full_name": "__m256 _mm256_mask_cvtph_ps(__m256 src, __mmask8 k, __m128i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtph_ps",
        "full_name": "__m256 _mm256_maskz_cvtph_ps(__mmask8 k, __m128i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtph_ps",
        "full_name": "__m512 _mm512_cvtph_ps(__m256i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtph_ps",
        "full_name": "__m512 _mm512_mask_cvtph_ps(__m512 src, __mmask16 k, __m256i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtph_ps",
        "full_name": "__m512 _mm512_maskz_cvtph_ps(__mmask16 k, __m256i a);",
        "description": "Convert packed half-precision (16-bit) floating-point elements in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtpd_pslo",
        "full_name": "__m512 _mm512_cvtpd_pslo(__m512d v2);",
        "description": "Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in \"v2\" to single-precision (32-bit) floating-point elements and stores them in \"dst\". The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0."
    },
    {
        "name": "_mm512_mask_cvtpd_pslo",
        "full_name": "__m512 _mm512_mask_cvtpd_pslo(__m512 src, __mmask8 k, __m512d v2);",
        "description": "Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in \"v2\" to single-precision (32-bit) floating-point elements and stores them in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0."
    },
    {
        "name": "_mm256_permutex2var_pd",
        "full_name": "__m256d _mm256_permutex2var_pd(__m256d a, __m256i idx, __m256d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutex2var_pd",
        "full_name": "__m256d _mm256_mask_permutex2var_pd(__m256d a, __mmask8 k, __m256i idx, __m256d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex2var_pd",
        "full_name": "__m256d _mm256_maskz_permutex2var_pd(__mmask8 k, __m256d a, __m256i idx, __m256d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask2_permutex2var_pd",
        "full_name": "__m256d _mm256_mask2_permutex2var_pd(__m256d a, __m256i idx, __mmask8 k, __m256d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutex2var_pd",
        "full_name": "__m512d _mm512_permutex2var_pd(__m512d a, __m512i idx, __m512d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutex2var_pd",
        "full_name": "__m512d _mm512_mask_permutex2var_pd(__m512d a, __mmask8 k, __m512i idx, __m512d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex2var_pd",
        "full_name": "__m512d _mm512_maskz_permutex2var_pd(__mmask8 k, __m512d a, __m512i idx, __m512d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask2_permutex2var_pd",
        "full_name": "__m512d _mm512_mask2_permutex2var_pd(__m512d a, __m512i idx, __mmask8 k, __m512d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)"
    },
    {
        "name": "_mm_mask2_permutex2var_pd",
        "full_name": "__m128d _mm_mask2_permutex2var_pd(__m128d a, __m128i idx, __mmask8 k, __m128d b);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)"
    },
    {
        "name": "_mm_permutex2var_ps",
        "full_name": "__m128 _mm_permutex2var_ps(__m128 a, __m128i idx, __m128 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutex2var_ps",
        "full_name": "__m128 _mm_mask_permutex2var_ps(__m128 a, __mmask8 k, __m128i idx, __m128 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutex2var_ps",
        "full_name": "__m128 _mm_maskz_permutex2var_ps(__mmask8 k, __m128 a, __m128i idx, __m128 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask2_permutex2var_ps",
        "full_name": "__m128 _mm_mask2_permutex2var_ps(__m128 a, __m128i idx, __mmask8 k, __m128 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutex2var_ps",
        "full_name": "__m256 _mm256_permutex2var_ps(__m256 a, __m256i idx, __m256 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutex2var_ps",
        "full_name": "__m256 _mm256_mask_permutex2var_ps(__m256 a, __mmask8 k, __m256i idx, __m256 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex2var_ps",
        "full_name": "__m256 _mm256_maskz_permutex2var_ps(__mmask8 k, __m256 a, __m256i idx, __m256 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask2_permutex2var_ps",
        "full_name": "__m256 _mm256_mask2_permutex2var_ps(__m256 a, __m256i idx, __mmask8 k, __m256 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutex2var_ps",
        "full_name": "__m512 _mm512_permutex2var_ps(__m512 a, __m512i idx, __m512 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutex2var_ps",
        "full_name": "__m512 _mm512_mask_permutex2var_ps(__m512 a, __mmask16 k, __m512i idx, __m512 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex2var_ps",
        "full_name": "__m512 _mm512_maskz_permutex2var_ps(__mmask16 k, __m512 a, __m512i idx, __m512 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask2_permutex2var_ps",
        "full_name": "__m512 _mm512_mask2_permutex2var_ps(__m512 a, __m512i idx, __mmask16 k, __m512 b);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"idx\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_permutexvar_epi16",
        "full_name": "__m128i _mm_permutexvar_epi16(__m128i idx, __m128i a);",
        "description": "Shuffle 16-bit integers in \"a\" using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutexvar_epi16",
        "full_name": "__m128i _mm_mask_permutexvar_epi16(__m128i src, __mmask8 k, __m128i idx, __m128i a);",
        "description": "Shuffle 16-bit integers in \"a\" using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutexvar_epi16",
        "full_name": "__m128i _mm_maskz_permutexvar_epi16(__mmask8 k, __m128i idx, __m128i a);",
        "description": "Shuffle 16-bit integers in \"a\" using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutexvar_epi16",
        "full_name": "__m256i _mm256_permutexvar_epi16(__m256i idx, __m256i a);",
        "description": "Shuffle 16-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutexvar_epi16",
        "full_name": "__m256i _mm256_mask_permutexvar_epi16(__m256i src, __mmask16 k, __m256i idx, __m256i a);",
        "description": "Shuffle 16-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutexvar_epi16",
        "full_name": "__m256i _mm256_maskz_permutexvar_epi16(__mmask16 k, __m256i idx, __m256i a);",
        "description": "Shuffle 16-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutexvar_epi16",
        "full_name": "__m512i _mm512_permutexvar_epi16(__m512i idx, __m512i a);",
        "description": "Shuffle 16-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutexvar_epi16",
        "full_name": "__m512i _mm512_mask_permutexvar_epi16(__m512i src, __mmask32 k, __m512i idx, __m512i a);",
        "description": "Shuffle 16-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutexvar_epi16",
        "full_name": "__m512i _mm512_maskz_permutexvar_epi16(__mmask32 k, __m512i idx, __m512i a);",
        "description": "Shuffle 16-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutexvar_epi32",
        "full_name": "__m256i _mm256_permutexvar_epi32(__m256i idx, __m256i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutexvar_epi32",
        "full_name": "__m256i _mm256_mask_permutexvar_epi32(__m256i src, __mmask8 k, __m256i idx, __m256i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutexvar_epi32",
        "full_name": "__m256i _mm256_maskz_permutexvar_epi32(__mmask8 k, __m256i idx, __m256i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_permutexvar_epi32",
        "full_name": "__m512i _mm512_mask_permutexvar_epi32(__m512i src, __mmask16 k, __m512i idx, __m512i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutexvar_epi32",
        "full_name": "__m512i _mm512_maskz_permutexvar_epi32(__mmask16 k, __m512i idx, __m512i a);",
        "description": "Shuffle 32-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutexvar_epi64",
        "full_name": "__m256i _mm256_permutexvar_epi64(__m256i idx, __m256i a);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutexvar_epi64",
        "full_name": "__m256i _mm256_mask_permutexvar_epi64(__m256i src, __mmask8 k, __m256i idx, __m256i a);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutexvar_epi64",
        "full_name": "__m256i _mm256_maskz_permutexvar_epi64(__mmask8 k, __m256i idx, __m256i a);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_permutexvar_epi64",
        "full_name": "__m512i _mm512_mask_permutexvar_epi64(__m512i src, __mmask8 k, __m512i idx, __m512i a);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutexvar_epi64",
        "full_name": "__m512i _mm512_maskz_permutexvar_epi64(__mmask8 k, __m512i idx, __m512i a);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutexvar_pd",
        "full_name": "__m256d _mm256_permutexvar_pd(__m256i idx, __m256d a);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutexvar_pd",
        "full_name": "__m256d _mm256_mask_permutexvar_pd(__m256d src, __mmask8 k, __m256i idx, __m256d a);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutexvar_pd",
        "full_name": "__m256d _mm256_maskz_permutexvar_pd(__mmask8 k, __m256i idx, __m256d a);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutexvar_pd",
        "full_name": "__m512d _mm512_permutexvar_pd(__m512i idx, __m512d a);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutexvar_pd",
        "full_name": "__m512d _mm512_mask_permutexvar_pd(__m512d src, __mmask8 k, __m512i idx, __m512d a);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutexvar_pd",
        "full_name": "__m512d _mm512_maskz_permutexvar_pd(__mmask8 k, __m512i idx, __m512d a);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutexvar_ps",
        "full_name": "__m256 _mm256_permutexvar_ps(__m256i idx, __m256 a);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\"."
    },
    {
        "name": "_mm256_mask_permutexvar_ps",
        "full_name": "__m256 _mm256_mask_permutexvar_ps(__m256 src, __mmask8 k, __m256i idx, __m256 a);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutexvar_ps",
        "full_name": "__m256 _mm256_maskz_permutexvar_ps(__mmask8 k, __m256i idx, __m256 a);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_unpackhi_pi32",
        "full_name": "__m64 _mm_unpackhi_pi32(__m64 a, __m64 b);",
        "description": "Unpack and interleave 32-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpacklo_pi32",
        "full_name": "__m64 _mm_unpacklo_pi32(__m64 a, __m64 b);",
        "description": "Unpack and interleave 32-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_alignr_epi8",
        "full_name": "__m128i _mm_alignr_epi8(__m128i a, __m128i b, int imm8);",
        "description": "Concatenate 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\"."
    },
    {
        "name": "_mm_mask_alignr_epi8",
        "full_name": "__m128i _mm_mask_alignr_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b, const int imm8);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_alignr_epi8",
        "full_name": "__m128i _mm_maskz_alignr_epi8(__mmask16 k, __m128i a, __m128i b, const int imm8);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_alignr_epi8",
        "full_name": "__m256i _mm256_mask_alignr_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b, const int imm8);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_alignr_epi8",
        "full_name": "__m256i _mm256_maskz_alignr_epi8(__mmask32 k, __m256i a, __m256i b, const int imm8);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_alignr_epi8",
        "full_name": "__m512i _mm512_alignr_epi8(__m512i a, __m512i b, const int imm8);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\"."
    },
    {
        "name": "_mm512_mask_alignr_epi8",
        "full_name": "__m512i _mm512_mask_alignr_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b, const int imm8);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_alignr_epi8",
        "full_name": "__m512i _mm512_maskz_alignr_epi8(__mmask64 k, __m512i a, __m512i b, const int imm8);",
        "description": "Concatenate pairs of 16-byte blocks in \"a\" and \"b\" into a 32-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_alignr_pi8",
        "full_name": "__m64 _mm_alignr_pi8(__m64 a, __m64 b, int imm8);",
        "description": "Concatenate 8-byte blocks in \"a\" and \"b\" into a 16-byte temporary result, shift the result right by \"imm8\" bytes, and store the low 16 bytes in \"dst\"."
    },
    {
        "name": "_mm_and_si64",
        "full_name": "__m64 _mm_and_si64(__m64 a, __m64 b);",
        "description": "Compute the bitwise AND of 64 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_andn_u32",
        "full_name": "unsigned int _andn_u32(unsigned int a, unsigned int b);",
        "description": "Compute the bitwise NOT of 32-bit integer \"a\" and then AND with b, and store the results in dst."
    },
    {
        "name": "_andn_u64",
        "full_name": "unsigned __int64 _andn_u64(unsigned __int64 a, unsigned __int64 b);",
        "description": "Compute the bitwise NOT of 64-bit integer \"a\" and then AND with b, and store the results in dst."
    },
    {
        "name": "_mm_andnot_si64",
        "full_name": "__m64 _mm_andnot_si64(__m64 a, __m64 b);",
        "description": "Compute the bitwise NOT of 64 bits (representing integer data) in \"a\" and then AND with \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm512_cmpgt_epi64_mask",
        "full_name": "__mmask8 _mm512_cmpgt_epi64_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpgt_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpgt_epi64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epi8_mask",
        "full_name": "__mmask16 _mm_cmpgt_epi8_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epi8_mask",
        "full_name": "__mmask16 _mm_mask_cmpgt_epi8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epi8_mask",
        "full_name": "__mmask32 _mm256_cmpgt_epi8_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpgt_epi8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpgt_epi8_mask",
        "full_name": "__mmask64 _mm512_cmpgt_epi8_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpgt_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpgt_epi8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epu16_mask",
        "full_name": "__mmask8 _mm_cmpgt_epu16_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epu16_mask",
        "full_name": "__mmask8 _mm_mask_cmpgt_epu16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epu16_mask",
        "full_name": "__mmask16 _mm256_cmpgt_epu16_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epu16_mask",
        "full_name": "__mmask16 _mm256_mask_cmpgt_epu16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpgt_epu16_mask",
        "full_name": "__mmask32 _mm512_cmpgt_epu16_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpgt_epu16_mask",
        "full_name": "__mmask32 _mm512_mask_cmpgt_epu16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epu32_mask",
        "full_name": "__mmask8 _mm_cmpgt_epu32_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epu32_mask",
        "full_name": "__mmask8 _mm_mask_cmpgt_epu32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epu32_mask",
        "full_name": "__mmask8 _mm256_cmpgt_epu32_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epu32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpgt_epu32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpgt_epu32_mask",
        "full_name": "__mmask16 _mm512_cmpgt_epu32_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpgt_epu32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpgt_epu32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epu64_mask",
        "full_name": "__mmask8 _mm_cmpgt_epu64_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epu64_mask",
        "full_name": "__mmask8 _mm_mask_cmpgt_epu64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epu64_mask",
        "full_name": "__mmask8 _mm256_cmpgt_epu64_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epu64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpgt_epu64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpgt_epu64_mask",
        "full_name": "__mmask8 _mm512_cmpgt_epu64_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpgt_epu64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpgt_epu64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpgt_epu8_mask",
        "full_name": "__mmask16 _mm_cmpgt_epu8_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpgt_epu8_mask",
        "full_name": "__mmask16 _mm_mask_cmpgt_epu8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpgt_epu8_mask",
        "full_name": "__mmask32 _mm256_cmpgt_epu8_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpgt_epu8_mask",
        "full_name": "__mmask32 _mm256_mask_cmpgt_epu8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpgt_epu8_mask",
        "full_name": "__mmask64 _mm512_cmpgt_epu8_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpgt_epu8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpgt_epu8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for greater-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epi16_mask",
        "full_name": "__mmask8 _mm_cmple_epi16_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epi16_mask",
        "full_name": "__mmask8 _mm_mask_cmple_epi16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epi16_mask",
        "full_name": "__mmask16 _mm256_cmple_epi16_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_cmple_epi16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_epi16_mask",
        "full_name": "__mmask32 _mm512_cmple_epi16_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_cmple_epi16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epi32_mask",
        "full_name": "__mmask8 _mm_cmple_epi32_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epi32_mask",
        "full_name": "__mmask8 _mm_mask_cmple_epi32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epi32_mask",
        "full_name": "__mmask8 _mm256_cmple_epi32_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_cmple_epi32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmple_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_cmple_epi32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epi64_mask",
        "full_name": "__mmask8 _mm_cmple_epi64_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epi64_mask",
        "full_name": "__mmask8 _mm_mask_cmple_epi64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epi64_mask",
        "full_name": "__mmask8 _mm256_cmple_epi64_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_cmple_epi64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_epi64_mask",
        "full_name": "__mmask8 _mm512_cmple_epi64_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_cmple_epi64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epi8_mask",
        "full_name": "__mmask16 _mm_cmple_epi8_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epi8_mask",
        "full_name": "__mmask16 _mm_mask_cmple_epi8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epi8_mask",
        "full_name": "__mmask32 _mm256_cmple_epi8_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_cmple_epi8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmple_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_cmple_epi8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epu16_mask",
        "full_name": "__mmask8 _mm_cmple_epu16_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epu16_mask",
        "full_name": "__mmask8 _mm_mask_cmple_epu16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epu16_mask",
        "full_name": "__mmask16 _mm256_cmple_epu16_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epu16_mask",
        "full_name": "__mmask16 _mm256_mask_cmple_epu16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_epu16_mask",
        "full_name": "__mmask32 _mm512_cmple_epu16_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_epu16_mask",
        "full_name": "__mmask32 _mm512_mask_cmple_epu16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epu32_mask",
        "full_name": "__mmask8 _mm_cmple_epu32_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epu32_mask",
        "full_name": "__mmask8 _mm_mask_cmple_epu32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epu32_mask",
        "full_name": "__mmask8 _mm256_cmple_epu32_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epu32_mask",
        "full_name": "__mmask8 _mm256_mask_cmple_epu32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_epu32_mask",
        "full_name": "__mmask16 _mm512_cmple_epu32_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_epu32_mask",
        "full_name": "__mmask16 _mm512_mask_cmple_epu32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epu64_mask",
        "full_name": "__mmask8 _mm_cmple_epu64_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epu64_mask",
        "full_name": "__mmask8 _mm_mask_cmple_epu64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epu64_mask",
        "full_name": "__mmask8 _mm256_cmple_epu64_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epu64_mask",
        "full_name": "__mmask8 _mm256_mask_cmple_epu64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_epu64_mask",
        "full_name": "__mmask8 _mm512_cmple_epu64_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_epu64_mask",
        "full_name": "__mmask8 _mm512_mask_cmple_epu64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmple_epu8_mask",
        "full_name": "__mmask16 _mm_cmple_epu8_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmple_epu8_mask",
        "full_name": "__mmask16 _mm_mask_cmple_epu8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmple_epu8_mask",
        "full_name": "__mmask32 _mm256_cmple_epu8_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmple_epu8_mask",
        "full_name": "__mmask32 _mm256_mask_cmple_epu8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_epu8_mask",
        "full_name": "__mmask64 _mm512_cmple_epu8_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_epu8_mask",
        "full_name": "__mmask64 _mm512_mask_cmple_epu8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_pd_mask",
        "full_name": "__mmask8 _mm512_cmple_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmple_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmple_ps_mask",
        "full_name": "__mmask16 _mm512_cmple_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmple_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmple_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmplt_epi16_mask",
        "full_name": "__mmask8 _mm_cmplt_epi16_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmplt_epi16_mask",
        "full_name": "__mmask8 _mm_mask_cmplt_epi16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmplt_epi16_mask",
        "full_name": "__mmask16 _mm256_cmplt_epi16_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmplt_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_cmplt_epi16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmplt_epi16_mask",
        "full_name": "__mmask32 _mm512_cmplt_epi16_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmplt_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_cmplt_epi16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmplt_epi32_mask",
        "full_name": "__mmask8 _mm_cmplt_epi32_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmplt_epi32_mask",
        "full_name": "__mmask8 _mm_mask_cmplt_epi32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmplt_epi32_mask",
        "full_name": "__mmask8 _mm256_cmplt_epi32_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmplt_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_cmplt_epi32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmplt_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_cmplt_epi32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmplt_epi64_mask",
        "full_name": "__mmask8 _mm_cmplt_epi64_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmplt_epi64_mask",
        "full_name": "__mmask8 _mm_mask_cmplt_epi64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_bswap",
        "full_name": "int _bswap(int a);",
        "description": "Reverse the byte order of 32-bit integer \"a\", and store the result in \"dst\". This intrinsic is provided for conversion between little and big endian values."
    },
    {
        "name": "_bswap64",
        "full_name": "__int64 _bswap64(__int64 a);",
        "description": "Reverse the byte order of 64-bit integer \"a\", and store the result in \"dst\". This intrinsic is provided for conversion between little and big endian values."
    },
    {
        "name": "_bzhi_u32",
        "full_name": "unsigned int _bzhi_u32(unsigned int a, unsigned int index);",
        "description": "Copy all bits from unsigned 32-bit integer \"a\" to \"dst\", and reset (set to 0) the high bits in \"dst\" starting at \"index\"."
    },
    {
        "name": "_bzhi_u64",
        "full_name": "unsigned __int64 _bzhi_u64(unsigned __int64 a, unsigned int index);",
        "description": "Copy all bits from unsigned 64-bit integer \"a\" to \"dst\", and reset (set to 0) the high bits in \"dst\" starting at \"index\"."
    },
    {
        "name": "_bittest",
        "full_name": "unsigned char _bittest(__int32 *a, __int32 b);",
        "description": "Return the bit at index \"b\" of 32-bit integer \"a\"."
    },
    {
        "name": "_bittest64",
        "full_name": "unsigned char _bittest64(__int64 *a, __int64 b);",
        "description": "Return the bit at index \"b\" of 64-bit integer \"a\"."
    },
    {
        "name": "_bittestandcomplement",
        "full_name": "unsigned char _bittestandcomplement(__int32 *a, __int32 b);",
        "description": "Return the bit at index \"b\" of 32-bit integer \"a\", and set that bit to its complement."
    },
    {
        "name": "_bittestandcomplement64",
        "full_name": "unsigned char _bittestandcomplement64(__int64 *a, __int64 b);",
        "description": "Return the bit at index \"b\" of 64-bit integer \"a\", and set that bit to its complement."
    },
    {
        "name": "_mm_mask_ternarylogic_epi32",
        "full_name": "__m128i _mm_mask_ternarylogic_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using writemask \"k\" at 32-bit granularity (32-bit elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_ternarylogic_epi32",
        "full_name": "__m128i _mm_maskz_ternarylogic_epi32(__mmask8 k, __m128i a, __m128i b, __m128i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using zeromask \"k\" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_ternarylogic_epi32",
        "full_name": "__m256i _mm256_mask_ternarylogic_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using writemask \"k\" at 32-bit granularity (32-bit elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_ternarylogic_epi32",
        "full_name": "__m256i _mm256_maskz_ternarylogic_epi32(__mmask8 k, __m256i a, __m256i b, __m256i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using zeromask \"k\" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_ternarylogic_epi32",
        "full_name": "__m512i _mm512_mask_ternarylogic_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using writemask \"k\" at 32-bit granularity (32-bit elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_ternarylogic_epi32",
        "full_name": "__m512i _mm512_maskz_ternarylogic_epi32(__mmask16 k, __m512i a, __m512i b, __m512i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 32-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using zeromask \"k\" at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_ternarylogic_epi64",
        "full_name": "__m128i _mm_ternarylogic_epi64(__m128i a, __m128i b, __m128i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\"."
    },
    {
        "name": "_mm256_ternarylogic_epi64",
        "full_name": "__m256i _mm256_ternarylogic_epi64(__m256i a, __m256i b, __m256i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\"."
    },
    {
        "name": "_mm512_ternarylogic_epi64",
        "full_name": "__m512i _mm512_ternarylogic_epi64(__m512i a, __m512i b, __m512i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\"."
    },
    {
        "name": "_mm_mask_ternarylogic_epi64",
        "full_name": "__m128i _mm_mask_ternarylogic_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using writemask \"k\" at 64-bit granularity (64-bit elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_ternarylogic_epi64",
        "full_name": "__m128i _mm_maskz_ternarylogic_epi64(__mmask8 k, __m128i a, __m128i b, __m128i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using zeromask \"k\" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_ternarylogic_epi64",
        "full_name": "__m256i _mm256_mask_ternarylogic_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using writemask \"k\" at 64-bit granularity (64-bit elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_ternarylogic_epi64",
        "full_name": "__m256i _mm256_maskz_ternarylogic_epi64(__mmask8 k, __m256i a, __m256i b, __m256i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using zeromask \"k\" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_ternarylogic_epi64",
        "full_name": "__m512i _mm512_mask_ternarylogic_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using writemask \"k\" at 64-bit granularity (64-bit elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_ternarylogic_epi64",
        "full_name": "__m512i _mm512_maskz_ternarylogic_epi64(__mmask8 k, __m512i a, __m512i b, __m512i c, int imm8);",
        "description": "Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in \"imm8\". For each bit in each packed 64-bit integer, the corresponding bit from \"a\", \"b\", and \"c\" are used according to \"imm8\", and the result is written to the corresponding bit in \"dst\" using zeromask \"k\" at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtusepi16_epi8",
        "full_name": "__m128i _mm_cvtusepi16_epi8(__m128i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtusepi16_epi8",
        "full_name": "__m128i _mm_mask_cvtusepi16_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtusepi16_epi8",
        "full_name": "__m128i _mm_maskz_cvtusepi16_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtusepi16_epi8",
        "full_name": "__m128i _mm256_cvtusepi16_epi8(__m256i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtusepi16_epi8",
        "full_name": "__m128i _mm256_mask_cvtusepi16_epi8(__m128i src, __mmask16 k, __m256i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtusepi16_epi8",
        "full_name": "__m128i _mm256_maskz_cvtusepi16_epi8(__mmask16 k, __m256i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtusepi16_epi8",
        "full_name": "__m256i _mm512_cvtusepi16_epi8(__m512i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtusepi16_epi8",
        "full_name": "__m256i _mm512_mask_cvtusepi16_epi8(__m256i src, __mmask32 k, __m512i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtusepi16_epi8",
        "full_name": "__m256i _mm512_maskz_cvtusepi16_epi8(__mmask32 k, __m512i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtusepi32_epi16",
        "full_name": "__m128i _mm_cvtusepi32_epi16(__m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtusepi32_epi16",
        "full_name": "__m128i _mm_mask_cvtusepi32_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtusepi32_epi16",
        "full_name": "__m128i _mm_maskz_cvtusepi32_epi16(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtusepi32_epi16",
        "full_name": "__m128i _mm256_cvtusepi32_epi16(__m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtusepi32_epi16",
        "full_name": "__m128i _mm256_mask_cvtusepi32_epi16(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtusepi32_epi16",
        "full_name": "__m128i _mm256_maskz_cvtusepi32_epi16(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtusepi32_epi16",
        "full_name": "__m256i _mm512_cvtusepi32_epi16(__m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtusepi32_epi16",
        "full_name": "__m256i _mm512_mask_cvtusepi32_epi16(__m256i src, __mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtusepi32_epi16",
        "full_name": "__m256i _mm512_maskz_cvtusepi32_epi16(__mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtusepi32_epi8",
        "full_name": "__m128i _mm_cvtusepi32_epi8(__m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtusepi32_epi8",
        "full_name": "__m128i _mm_mask_cvtusepi32_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtusepi32_epi8",
        "full_name": "__m128i _mm_maskz_cvtusepi32_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtusepi32_epi8",
        "full_name": "__m128i _mm256_cvtusepi32_epi8(__m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtusepi32_epi8",
        "full_name": "__m128i _mm256_mask_cvtusepi32_epi8(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtusepi32_epi8",
        "full_name": "__m128i _mm256_maskz_cvtusepi32_epi8(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtusepi32_epi8",
        "full_name": "__m128i _mm512_cvtusepi32_epi8(__m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtusepi32_epi8",
        "full_name": "__m128i _mm512_mask_cvtusepi32_epi8(__m128i src, __mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtusepi32_epi8",
        "full_name": "__m128i _mm512_maskz_cvtusepi32_epi8(__mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtusepi64_epi32",
        "full_name": "__m128i _mm_cvtusepi64_epi32(__m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtusepi64_epi32",
        "full_name": "__m128i _mm_mask_cvtusepi64_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtusepi64_epi32",
        "full_name": "__m128i _mm_maskz_cvtusepi64_epi32(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtusepi64_epi32",
        "full_name": "__m128i _mm256_cvtusepi64_epi32(__m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtusepi64_epi32",
        "full_name": "__m128i _mm256_mask_cvtusepi64_epi32(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtusepi64_epi32",
        "full_name": "__m128i _mm256_maskz_cvtusepi64_epi32(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtusepi64_epi32",
        "full_name": "__m256i _mm512_cvtusepi64_epi32(__m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtusepi64_epi32",
        "full_name": "__m256i _mm512_mask_cvtusepi64_epi32(__m256i src, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtusepi64_epi32",
        "full_name": "__m256i _mm512_maskz_cvtusepi64_epi32(__mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtusepi64_epi16",
        "full_name": "__m128i _mm_cvtusepi64_epi16(__m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtusepi64_epi16",
        "full_name": "__m128i _mm_mask_cvtusepi64_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtusepi64_epi16",
        "full_name": "__m128i _mm_maskz_cvtusepi64_epi16(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtusepi64_epi16",
        "full_name": "__m128i _mm256_cvtusepi64_epi16(__m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtusepi64_epi16",
        "full_name": "__m128i _mm256_mask_cvtusepi64_epi16(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtusepi64_epi16",
        "full_name": "__m128i _mm256_maskz_cvtusepi64_epi16(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtusepi64_epi16",
        "full_name": "__m128i _mm512_cvtusepi64_epi16(__m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtusepi64_epi16",
        "full_name": "__m128i _mm512_mask_cvtusepi64_epi16(__m128i src, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtusepi64_epi16",
        "full_name": "__m128i _mm512_maskz_cvtusepi64_epi16(__mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtusepi64_epi8",
        "full_name": "__m128i _mm_cvtusepi64_epi8(__m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtusepi64_epi8",
        "full_name": "__m128i _mm_mask_cvtusepi64_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtusepi64_epi8",
        "full_name": "__m128i _mm_maskz_cvtusepi64_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtusepi64_epi8",
        "full_name": "__m128i _mm256_cvtusepi64_epi8(__m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtusepi64_epi8",
        "full_name": "__m128i _mm256_mask_cvtusepi64_epi8(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtusepi64_epi8",
        "full_name": "__m128i _mm256_maskz_cvtusepi64_epi8(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtusepi64_epi8",
        "full_name": "__m128i _mm512_cvtusepi64_epi8(__m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtusepi64_epi8",
        "full_name": "__m128i _mm512_mask_cvtusepi64_epi8(__m128i src, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtusepi64_epi8",
        "full_name": "__m128i _mm512_maskz_cvtusepi64_epi8(__mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_packus_epi16",
        "full_name": "__m128i _mm_packus_epi16(__m128i a, __m128i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_packus_epi16",
        "full_name": "__m128i _mm_mask_packus_epi16(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_packus_epi16",
        "full_name": "__m128i _mm_maskz_packus_epi16(__mmask16 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_packus_epi16",
        "full_name": "__m256i _mm256_packus_epi16(__m256i a, __m256i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_packus_epi16",
        "full_name": "__m256i _mm256_mask_packus_epi16(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_packus_epi16",
        "full_name": "__m256i _mm256_maskz_packus_epi16(__mmask32 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_packus_epi16",
        "full_name": "__m512i _mm512_packus_epi16(__m512i a, __m512i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_packus_epi16",
        "full_name": "__m512i _mm512_mask_packus_epi16(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_packus_epi16",
        "full_name": "__m512i _mm512_maskz_packus_epi16(__mmask64 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_packus_epi32",
        "full_name": "__m128i _mm_packus_epi32(__m128i a, __m128i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_packus_epi32",
        "full_name": "__m128i _mm_mask_packus_epi32(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_packus_epi32",
        "full_name": "__m128i _mm_maskz_packus_epi32(__mmask16 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_packus_epi32",
        "full_name": "__m256i _mm256_packus_epi32(__m256i a, __m256i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_packus_epi32",
        "full_name": "__m256i _mm256_mask_packus_epi32(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_packus_epi32",
        "full_name": "__m256i _mm256_maskz_packus_epi32(__mmask16 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_packus_epi32",
        "full_name": "__m512i _mm512_packus_epi32(__m512i a, __m512i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_packus_epi32",
        "full_name": "__m512i _mm512_mask_packus_epi32(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_packus_epi32",
        "full_name": "__m512i _mm512_maskz_packus_epi32(__mmask32 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using unsigned saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_invcbrt_pd",
        "full_name": "__m128d _mm_invcbrt_pd(__m128d a);",
        "description": "Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_invcbrt_pd",
        "full_name": "__m256d _mm256_invcbrt_pd(__m256d a);",
        "description": "Compute the inverse cube root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_invcbrt_ps",
        "full_name": "__m128 _mm_invcbrt_ps(__m128 a);",
        "description": "Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_invcbrt_ps",
        "full_name": "__m256 _mm256_invcbrt_ps(__m256 a);",
        "description": "Compute the inverse cube root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_invsqrt_pd",
        "full_name": "__m128d _mm_invsqrt_pd(__m128d a);",
        "description": "Compute the inverse square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_invsqrt_pd",
        "full_name": "__m256d _mm256_invsqrt_pd(__m256d a);",
        "description": "Compute the inverse square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_invsqrt_pd",
        "full_name": "__m512d _mm512_invsqrt_pd(__m512d a);",
        "description": "Compute the inverse square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_invsqrt_pd",
        "full_name": "__m512d _mm512_mask_invsqrt_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_invsqrt_ps",
        "full_name": "__m128 _mm_invsqrt_ps(__m128 a);",
        "description": "Compute the inverse square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_invsqrt_ps",
        "full_name": "__m256 _mm256_invsqrt_ps(__m256 a);",
        "description": "Compute the inverse square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_invsqrt_ps",
        "full_name": "__m512 _mm512_invsqrt_ps(__m512 a);",
        "description": "Compute the inverse square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_invsqrt_ps",
        "full_name": "__m512 _mm512_mask_invsqrt_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtsepi16_storeu_epi8",
        "full_name": "void _mm_mask_cvtsepi16_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtsepi16_storeu_epi8",
        "full_name": "void _mm256_mask_cvtsepi16_storeu_epi8(void *base_addr, __mmask16 k, __m256i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtsepi16_storeu_epi8",
        "full_name": "void _mm512_mask_cvtsepi16_storeu_epi8(void *base_addr, __mmask32 k, __m512i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtepu16_epi64",
        "full_name": "__m128i _mm_mask_cvtepu16_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu16_epi64",
        "full_name": "__m128i _mm_maskz_cvtepu16_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in the low 4 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu16_epi64",
        "full_name": "__m256i _mm256_cvtepu16_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepu16_epi64",
        "full_name": "__m256i _mm256_mask_cvtepu16_epi64(__m256i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu16_epi64",
        "full_name": "__m256i _mm256_maskz_cvtepu16_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in the low 8 bytes of \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu16_epi64",
        "full_name": "__m512i _mm512_cvtepu16_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu16_epi64",
        "full_name": "__m512i _mm512_mask_cvtepu16_epi64(__m512i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu16_epi64",
        "full_name": "__m512i _mm512_maskz_cvtepu16_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 16-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtepu32_epi64",
        "full_name": "__m128i _mm_mask_cvtepu32_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu32_epi64",
        "full_name": "__m128i _mm_maskz_cvtepu32_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu32_epi64",
        "full_name": "__m256i _mm256_cvtepu32_epi64(__m128i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepu32_epi64",
        "full_name": "__m256i _mm256_mask_cvtepu32_epi64(__m256i src, __mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu32_epi64",
        "full_name": "__m256i _mm256_maskz_cvtepu32_epi64(__mmask8 k, __m128i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu32_epi64",
        "full_name": "__m512i _mm512_cvtepu32_epi64(__m256i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu32_epi64",
        "full_name": "__m512i _mm512_mask_cvtepu32_epi64(__m512i src, __mmask8 k, __m256i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu32_epi64",
        "full_name": "__m512i _mm512_maskz_cvtepu32_epi64(__mmask8 k, __m256i a);",
        "description": "Zero extend packed unsigned 32-bit integers in \"a\" to packed 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtepu32_pd",
        "full_name": "__m128d _mm_cvtepu32_pd(__m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtepu32_pd",
        "full_name": "__m128d _mm_mask_cvtepu32_pd(__m128d src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtepu32_pd",
        "full_name": "__m128d _mm_maskz_cvtepu32_pd(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtepu32_pd",
        "full_name": "__m256d _mm256_cvtepu32_pd(__m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtepu32_pd",
        "full_name": "__m256d _mm256_mask_cvtepu32_pd(__m256d src, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtepu32_pd",
        "full_name": "__m256d _mm256_maskz_cvtepu32_pd(__mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu32_pd",
        "full_name": "__m512d _mm512_cvtepu32_pd(__m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu32_pd",
        "full_name": "__m512d _mm512_mask_cvtepu32_pd(__m512d src, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtepu32_pd",
        "full_name": "__m512d _mm512_maskz_cvtepu32_pd(__mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtepu32_ps",
        "full_name": "__m512 _mm512_cvtepu32_ps(__m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtepu32_ps",
        "full_name": "__m512 _mm512_mask_cvtepu32_ps(__m512 src, __mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtsd_ss",
        "full_name": "__m128 _mm_mask_cvtsd_ss(__m128 src, __mmask8 k, __m128 a, __m128d b);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_cvtsd_ss",
        "full_name": "__m128 _mm_maskz_cvtsd_ss(__mmask8 k, __m128 a, __m128d b);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cvtsd_u32",
        "full_name": "unsigned int _mm_cvtsd_u32(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 32-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtsd_u64",
        "full_name": "unsigned __int64 _mm_cvtsd_u64(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 64-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtsepi16_epi8",
        "full_name": "__m128i _mm_cvtsepi16_epi8(__m128i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtsepi16_epi8",
        "full_name": "__m128i _mm_mask_cvtsepi16_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtsepi16_epi8",
        "full_name": "__m128i _mm_maskz_cvtsepi16_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtsepi16_epi8",
        "full_name": "__m128i _mm256_cvtsepi16_epi8(__m256i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtsepi16_epi8",
        "full_name": "__m128i _mm256_mask_cvtsepi16_epi8(__m128i src, __mmask16 k, __m256i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtsepi16_epi8",
        "full_name": "__m128i _mm256_maskz_cvtsepi16_epi8(__mmask16 k, __m256i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtsepi16_epi8",
        "full_name": "__m256i _mm512_cvtsepi16_epi8(__m512i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtsepi16_epi8",
        "full_name": "__m256i _mm512_mask_cvtsepi16_epi8(__m256i src, __mmask32 k, __m512i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtsepi16_epi8",
        "full_name": "__m256i _mm512_maskz_cvtsepi16_epi8(__mmask32 k, __m512i a);",
        "description": "Convert packed signed 16-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtsepi32_epi16",
        "full_name": "__m128i _mm_cvtsepi32_epi16(__m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtsepi32_epi16",
        "full_name": "__m128i _mm_mask_cvtsepi32_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtsepi32_epi16",
        "full_name": "__m128i _mm_maskz_cvtsepi32_epi16(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtsepi32_epi16",
        "full_name": "__m128i _mm256_cvtsepi32_epi16(__m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtsepi32_epi16",
        "full_name": "__m128i _mm256_mask_cvtsepi32_epi16(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtsepi32_epi16",
        "full_name": "__m128i _mm256_maskz_cvtsepi32_epi16(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtsepi32_epi16",
        "full_name": "__m256i _mm512_cvtsepi32_epi16(__m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtsepi32_epi16",
        "full_name": "__m256i _mm512_mask_cvtsepi32_epi16(__m256i src, __mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtsepi32_epi16",
        "full_name": "__m256i _mm512_maskz_cvtsepi32_epi16(__mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtsepi32_epi8",
        "full_name": "__m128i _mm_cvtsepi32_epi8(__m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtsepi32_epi8",
        "full_name": "__m128i _mm_mask_cvtsepi32_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtsepi32_epi8",
        "full_name": "__m128i _mm_maskz_cvtsepi32_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtsepi32_epi8",
        "full_name": "__m128i _mm256_cvtsepi32_epi8(__m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtsepi32_epi8",
        "full_name": "__m128i _mm256_mask_cvtsepi32_epi8(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtsepi32_epi8",
        "full_name": "__m128i _mm256_maskz_cvtsepi32_epi8(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_max_round_pd",
        "full_name": "__m512d _mm512_max_round_pd(__m512d a, __m512d b, int sae);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\".  [sae_note][max_float_note]"
    },
    {
        "name": "_mm512_mask_max_round_pd",
        "full_name": "__m512d _mm512_mask_max_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int sae);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).   [sae_note][max_float_note]"
    },
    {
        "name": "_mm512_maskz_max_round_pd",
        "full_name": "__m512d _mm512_maskz_max_round_pd(__mmask8 k, __m512d a, __m512d b, int sae);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note][max_float_note]"
    },
    {
        "name": "_mm512_max_round_ps",
        "full_name": "__m512 _mm512_max_round_ps(__m512 a, __m512 b, int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\".  [sae_note][max_float_note]"
    },
    {
        "name": "_mm512_mask_max_round_ps",
        "full_name": "__m512 _mm512_mask_max_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).   [sae_note][max_float_note]"
    },
    {
        "name": "_mm512_maskz_max_round_ps",
        "full_name": "__m512 _mm512_maskz_max_round_ps(__mmask16 k, __m512 a, __m512 b, int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note][max_float_note]"
    },
    {
        "name": "_mm_mask_max_round_sd",
        "full_name": "__m128d _mm_mask_max_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [sae_note][max_float_note]"
    },
    {
        "name": "_mm_maskz_max_round_sd",
        "full_name": "__m128d _mm_maskz_max_round_sd(__mmask8 k, __m128d a, __m128d b, int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [sae_note][max_float_note]"
    },
    {
        "name": "_mm_max_round_sd",
        "full_name": "__m128d _mm_max_round_sd(__m128d a, __m128d b, int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". [sae_note][max_float_note]"
    },
    {
        "name": "_mm_mask_max_round_ss",
        "full_name": "__m128 _mm_mask_max_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [sae_note][max_float_note]"
    },
    {
        "name": "_mm_maskz_max_round_ss",
        "full_name": "__m128 _mm_maskz_max_round_ss(__mmask8 k, __m128 a, __m128 b, int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [sae_note][max_float_note]"
    },
    {
        "name": "_mm_max_round_ss",
        "full_name": "__m128 _mm_max_round_ss(__m128 a, __m128 b, int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the maximum value in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [sae_note][max_float_note]"
    },
    {
        "name": "_mm512_min_round_pd",
        "full_name": "__m512d _mm512_min_round_pd(__m512d a, __m512d b, int sae);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\".  [sae_note][min_float_note]"
    },
    {
        "name": "_mm512_mask_min_round_pd",
        "full_name": "__m512d _mm512_mask_min_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int sae);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).   [sae_note][min_float_note]"
    },
    {
        "name": "_mm512_maskz_min_round_pd",
        "full_name": "__m512d _mm512_maskz_min_round_pd(__mmask8 k, __m512d a, __m512d b, int sae);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note][min_float_note]"
    },
    {
        "name": "_mm512_min_round_ps",
        "full_name": "__m512 _mm512_min_round_ps(__m512 a, __m512 b, int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\".  [sae_note][min_float_note]"
    },
    {
        "name": "_mm512_mask_min_round_ps",
        "full_name": "__m512 _mm512_mask_min_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).   [sae_note][min_float_note]"
    },
    {
        "name": "_mm512_maskz_min_round_ps",
        "full_name": "__m512 _mm512_maskz_min_round_ps(__mmask16 k, __m512 a, __m512 b, int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note][min_float_note]"
    },
    {
        "name": "_mm_mask_min_round_sd",
        "full_name": "__m128d _mm_mask_min_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [sae_note][min_float_note]"
    },
    {
        "name": "_mm_maskz_min_round_sd",
        "full_name": "__m128d _mm_maskz_min_round_sd(__mmask8 k, __m128d a, __m128d b, int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". [sae_note][min_float_note]"
    },
    {
        "name": "_mm_min_round_sd",
        "full_name": "__m128d _mm_min_round_sd(__m128d a, __m128d b, int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" , and copy the upper element from \"a\" to the upper element of \"dst\". [sae_note][min_float_note]"
    },
    {
        "name": "_mm_mask_min_round_ss",
        "full_name": "__m128 _mm_mask_min_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [sae_note][min_float_note]"
    },
    {
        "name": "_mm_maskz_min_round_ss",
        "full_name": "__m128 _mm_maskz_min_round_ss(__mmask8 k, __m128 a, __m128 b, int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [sae_note][min_float_note]"
    },
    {
        "name": "_mm_min_round_ss",
        "full_name": "__m128 _mm_min_round_ss(__m128 a, __m128 b, int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", store the minimum value in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [sae_note][min_float_note]"
    },
    {
        "name": "_mm512_maskz_div_round_ps",
        "full_name": "__m512 _mm512_maskz_div_round_ps(__mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Divide packed single-precision (32-bit) floating-point elements in \"a\" by packed elements in \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_fmaddsub_round_pd",
        "full_name": "__m512d _mm512_fmaddsub_round_pd(__m512d a, __m512d b, __m512d c, const int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmaddsub_round_pd",
        "full_name": "__m512d _mm512_mask_fmaddsub_round_pd(__m512d a, __mmask8 k, __m512d b, __m512d c, const int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_mask3_fmaddsub_round_pd",
        "full_name": "__m512d _mm512_mask3_fmaddsub_round_pd(__m512d a, __m512d b, __m512d c, __mmask8 k, const int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fmaddsub_round_pd",
        "full_name": "__m512d _mm512_maskz_fmaddsub_round_pd(__mmask8 k, __m512d a, __m512d b, __m512d c, const int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_fmaddsub_round_ps",
        "full_name": "__m512 _mm512_fmaddsub_round_ps(__m512 a, __m512 b, __m512 c, const int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmaddsub_round_ps",
        "full_name": "__m512 _mm512_mask_fmaddsub_round_ps(__m512 a, __mmask16 k, __m512 b, __m512 c, const int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_mask3_fmaddsub_round_ps",
        "full_name": "__m512 _mm512_mask3_fmaddsub_round_ps(__m512 a, __m512 b, __m512 c, __mmask16 k, const int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fmaddsub_round_ps",
        "full_name": "__m512 _mm512_maskz_fmaddsub_round_ps(__mmask16 k, __m512 a, __m512 b, __m512 c, const int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively add and subtract packed elements in \"c\" to/from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm_broadcastmb_epi64",
        "full_name": "__m128i _mm_broadcastmb_epi64(__mmask8 k);",
        "description": "Broadcast the low 8-bits from input mask \"k\" to all 64-bit elements of \"dst\"."
    },
    {
        "name": "_mm256_broadcastmb_epi64",
        "full_name": "__m256i _mm256_broadcastmb_epi64(__mmask8 k);",
        "description": "Broadcast the low 8-bits from input mask \"k\" to all 64-bit elements of \"dst\"."
    },
    {
        "name": "_mm512_broadcastmb_epi64",
        "full_name": "__m512i _mm512_broadcastmb_epi64(__mmask8 k);",
        "description": "Broadcast the low 8-bits from input mask \"k\" to all 64-bit elements of \"dst\"."
    },
    {
        "name": "_mm_broadcastmw_epi32",
        "full_name": "__m128i _mm_broadcastmw_epi32(__mmask16 k);",
        "description": "Broadcast the low 16-bits from input mask \"k\" to all 32-bit elements of \"dst\"."
    },
    {
        "name": "_mm256_broadcastmw_epi32",
        "full_name": "__m256i _mm256_broadcastmw_epi32(__mmask16 k);",
        "description": "Broadcast the low 16-bits from input mask \"k\" to all 32-bit elements of \"dst\"."
    },
    {
        "name": "_mm512_broadcastmw_epi32",
        "full_name": "__m512i _mm512_broadcastmw_epi32(__mmask16 k);",
        "description": "Broadcast the low 16-bits from input mask \"k\" to all 32-bit elements of \"dst\"."
    },
    {
        "name": "_mm_cbrt_pd",
        "full_name": "__m128d _mm_cbrt_pd(__m128d a);",
        "description": "Compute the cube root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cbrt_pd",
        "full_name": "__m256d _mm256_cbrt_pd(__m256d a);",
        "description": "Compute the cube root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cbrt_pd",
        "full_name": "__m512d _mm512_cbrt_pd(__m512d a);",
        "description": "Compute the cube root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cbrt_pd",
        "full_name": "__m512d _mm512_mask_cbrt_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the cube root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cbrt_ps",
        "full_name": "__m128 _mm_cbrt_ps(__m128 a);",
        "description": "Compute the cube root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cbrt_ps",
        "full_name": "__m256 _mm256_cbrt_ps(__m256 a);",
        "description": "Compute the cube root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cbrt_ps",
        "full_name": "__m512 _mm512_cbrt_ps(__m512 a);",
        "description": "Compute the cube root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cbrt_ps",
        "full_name": "__m512 _mm512_mask_cbrt_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the cube root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_i32scatter_epi32",
        "full_name": "void _mm_i32scatter_epi32(void *base_addr, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32scatter_epi32",
        "full_name": "void _mm256_i32scatter_epi32(void *base_addr, __m256i vindex, __m256i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32scatter_epi32",
        "full_name": "void _mm512_i32scatter_epi32(void *base_addr, __m512i vindex, __m512i a, int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i32scatter_epi64",
        "full_name": "void _mm_i32scatter_epi64(void *base_addr, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32scatter_epi64",
        "full_name": "void _mm256_i32scatter_epi64(void *base_addr, __m128i vindex, __m256i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32scatter_epi64",
        "full_name": "void _mm512_i32scatter_epi64(void *base_addr, __m256i vindex, __m512i a, int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i32scatter_epi64",
        "full_name": "void _mm_mask_i32scatter_epi64(void *base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i32scatter_epi64",
        "full_name": "void _mm256_mask_i32scatter_epi64(void *base_addr, __mmask8 k, __m128i vindex, __m256i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32scatter_epi64",
        "full_name": "void _mm512_mask_i32scatter_epi64(void *base_addr, __mmask8 k, __m256i vindex, __m512i a, int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i32gather_epi64",
        "full_name": "__m128i _mm_i32gather_epi64(__int64 const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32gather_epi64",
        "full_name": "__m256i _mm256_i32gather_epi64(__int64 const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32gather_epi64",
        "full_name": "__m512i _mm512_i32gather_epi64(__m256i vindex, void const * base_addr, int scale);",
        "description": "Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i32gather_epi64",
        "full_name": "__m128i _mm_mmask_i32gather_epi64(__m128i src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i32gather_epi64",
        "full_name": "__m256i _mm256_mmask_i32gather_epi64(__m256i src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32gather_epi64",
        "full_name": "__m512i _mm512_mask_i32gather_epi64(__m512i src, __mmask8 k, __m256i vindex, void const * base_addr, int scale);",
        "description": "Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i32gather_epi64",
        "full_name": "__m128i _mm_mask_i32gather_epi64(__m128i src, __int64 const * base_addr, __m128i vindex, __m128i mask, const int scale);",
        "description": "Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i32gather_pd",
        "full_name": "__m128d _mm_i32gather_pd(double const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32gather_pd",
        "full_name": "__m256d _mm256_i32gather_pd(double const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32gather_pd",
        "full_name": "__m512d _mm512_i32gather_pd(__m256i vindex, void const * base_addr, int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i32gather_pd",
        "full_name": "__m128d _mm_mmask_i32gather_pd(__m128d src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i32gather_pd",
        "full_name": "__m256d _mm256_mmask_i32gather_pd(__m256d src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32gather_pd",
        "full_name": "__m512d _mm512_mask_i32gather_pd(__m512d src, __mmask8 k, __m256i vindex, void const * base_addr, int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i32gather_pd",
        "full_name": "__m128d _mm_mask_i32gather_pd(__m128d src, double const * base_addr, __m128i vindex, __m128d mask, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i32gather_pd",
        "full_name": "__m256d _mm256_mask_i32gather_pd(__m256d src, double const * base_addr, __m128i vindex, __m256d mask, const int scale);",
        "description": "Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i32gather_ps",
        "full_name": "__m128 _mm_i32gather_ps(float const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32gather_ps",
        "full_name": "__m256 _mm256_i32gather_ps(float const * base_addr, __m256i vindex, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32gather_ps",
        "full_name": "__m512 _mm512_i32gather_ps(__m512i vindex, void const * base_addr, int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i32gather_ps",
        "full_name": "__m128 _mm_mmask_i32gather_ps(__m128 src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i32gather_ps",
        "full_name": "__m256 _mm256_mmask_i32gather_ps(__m256 src, __mmask8 k, __m256i vindex, void const * base_addr, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32gather_ps",
        "full_name": "__m512 _mm512_mask_i32gather_ps(__m512 src, __mmask16 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i32gather_ps",
        "full_name": "__m128 _mm_mask_i32gather_ps(__m128 src, float const * base_addr, __m128i vindex, __m128 mask, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i32gather_ps",
        "full_name": "__m256 _mm256_mask_i32gather_ps(__m256 src, float const * base_addr, __m256i vindex, __m256 mask, const int scale);",
        "description": "Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_hypot_pd",
        "full_name": "__m128d _mm_hypot_pd(__m128d a, __m128d b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_hypot_pd",
        "full_name": "__m256d _mm256_hypot_pd(__m256d a, __m256d b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_hypot_pd",
        "full_name": "__m512d _mm512_hypot_pd(__m512d a, __m512d b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_hypot_pd",
        "full_name": "__m512d _mm512_mask_hypot_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_hypot_ps",
        "full_name": "__m128 _mm_hypot_ps(__m128 a, __m128 b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_hypot_ps",
        "full_name": "__m256 _mm256_hypot_ps(__m256 a, __m256 b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_hypot_ps",
        "full_name": "__m512 _mm512_hypot_ps(__m512 a, __m512 b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_hypot_ps",
        "full_name": "__m512 _mm512_mask_hypot_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the length of the hypotenous of a right triangle, with the lengths of the other two sides of the triangle stored as packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_andnot_ps",
        "full_name": "__m128 _mm_andnot_ps(__m128 a, __m128 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_andnot_ps",
        "full_name": "__m128 _mm_mask_andnot_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_andnot_ps",
        "full_name": "__m128 _mm_maskz_andnot_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_andnot_ps",
        "full_name": "__m256 _mm256_andnot_ps(__m256 a, __m256 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_andnot_ps",
        "full_name": "__m256 _mm256_mask_andnot_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_andnot_ps",
        "full_name": "__m256 _mm256_maskz_andnot_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_andnot_ps",
        "full_name": "__m512 _mm512_andnot_ps(__m512 a, __m512 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_andnot_ps",
        "full_name": "__m512 _mm512_mask_andnot_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_andnot_ps",
        "full_name": "__m512 _mm512_maskz_andnot_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the bitwise NOT of packed single-precision (32-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_andnot_pd",
        "full_name": "__m128d _mm_andnot_pd(__m128d a, __m128d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_andnot_pd",
        "full_name": "__m128d _mm_mask_andnot_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_andnot_pd",
        "full_name": "__m128d _mm_maskz_andnot_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_andnot_pd",
        "full_name": "__m256d _mm256_andnot_pd(__m256d a, __m256d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_andnot_pd",
        "full_name": "__m256d _mm256_mask_andnot_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_andnot_pd",
        "full_name": "__m256d _mm256_maskz_andnot_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_andnot_pd",
        "full_name": "__m512d _mm512_andnot_pd(__m512d a, __m512d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_andnot_pd",
        "full_name": "__m512d _mm512_mask_andnot_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_andnot_pd",
        "full_name": "__m512d _mm512_maskz_andnot_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in \"a\" and then AND with \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtps_epu64",
        "full_name": "__m128i _mm_cvtps_epu64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtps_epu64",
        "full_name": "__m128i _mm_mask_cvtps_epu64(__m128i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtps_epu64",
        "full_name": "__m128i _mm_maskz_cvtps_epu64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtps_epu64",
        "full_name": "__m256i _mm256_cvtps_epu64(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtps_epu64",
        "full_name": "__m256i _mm256_mask_cvtps_epu64(__m256i src, __mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtps_epu64",
        "full_name": "__m256i _mm256_maskz_cvtps_epu64(__mmask8 k, __m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtps_epu64",
        "full_name": "__m512i _mm512_cvtps_epu64(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtps_epu64",
        "full_name": "__m512i _mm512_mask_cvtps_epu64(__m512i src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtps_epu64",
        "full_name": "__m512i _mm512_maskz_cvtps_epu64(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed unsigned 64-bit integers, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtps_pd",
        "full_name": "__m128d _mm_cvtps_pd(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cvtps_pd",
        "full_name": "__m256d _mm256_cvtps_pd(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cvtps_pd",
        "full_name": "__m512d _mm512_cvtps_pd(__m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtps_pd",
        "full_name": "__m512d _mm512_mask_cvtps_pd(__m512d src, __mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtps_pd",
        "full_name": "__m512d _mm512_maskz_cvtps_pd(__mmask8 k, __m256 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed double-precision (64-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtpu16_ps",
        "full_name": "__m128 _mm_cvtpu16_ps(__m64 a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtpu8_ps",
        "full_name": "__m128 _mm_cvtpu8_ps(__m64 a);",
        "description": "Convert the lower packed unsigned 8-bit integers in \"a\" to packed single-precision (32-bit) floating-point elements, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtsd_i64",
        "full_name": "__int64 _mm_cvtsd_i64(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm256_floor_pd",
        "full_name": "__m256d _mm256_floor_pd(__m256d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_floor_pd",
        "full_name": "__m512d _mm512_floor_pd(__m512d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_floor_pd",
        "full_name": "__m512d _mm512_mask_floor_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed double-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_floor_ps",
        "full_name": "__m256 _mm256_floor_ps(__m256 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed single-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_floor_ps",
        "full_name": "__m512 _mm512_floor_ps(__m512 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed single-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_floor_ps",
        "full_name": "__m512 _mm512_mask_floor_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed single-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cosh_ps",
        "full_name": "__m128 _mm_cosh_ps(__m128 a);",
        "description": "Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cosh_ps",
        "full_name": "__m256 _mm256_cosh_ps(__m256 a);",
        "description": "Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cosh_ps",
        "full_name": "__m512 _mm512_cosh_ps(__m512 a);",
        "description": "Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cosh_ps",
        "full_name": "__m512 _mm512_mask_cosh_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the hyperbolic cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cosh_pd",
        "full_name": "__m128d _mm_cosh_pd(__m128d a);",
        "description": "Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cosh_pd",
        "full_name": "__m256d _mm256_cosh_pd(__m256d a);",
        "description": "Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cosh_pd",
        "full_name": "__m512d _mm512_cosh_pd(__m512d a);",
        "description": "Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cosh_pd",
        "full_name": "__m512d _mm512_mask_cosh_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the hyperbolic cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtpslo_pd",
        "full_name": "__m512d _mm512_cvtpslo_pd(__m512 v2);",
        "description": "Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in \"v2\" to packed double-precision (64-bit) floating-point elements, storing the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtpslo_pd",
        "full_name": "__m512d _mm512_mask_cvtpslo_pd(__m512d src, __mmask8 k, __m512 v2);",
        "description": "Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in \"v2\" to packed double-precision (64-bit) floating-point elements, storing the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_dp_pd",
        "full_name": "__m128d _mm_dp_pd(__m128d a, __m128d b, const int imm8);",
        "description": "Conditionally multiply the packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the high 4 bits in \"imm8\", sum the four products, and conditionally store the sum in \"dst\" using the low 4 bits of \"imm8\"."
    },
    {
        "name": "_mm_dp_ps",
        "full_name": "__m128 _mm_dp_ps(__m128 a, __m128 b, const int imm8);",
        "description": "Conditionally multiply the packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the high 4 bits in \"imm8\", sum the four products, and conditionally store the sum in \"dst\" using the low 4 bits of \"imm8\"."
    },
    {
        "name": "_mm256_dp_ps",
        "full_name": "__m256 _mm256_dp_ps(__m256 a, __m256 b, const int imm8);",
        "description": "Conditionally multiply the packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the high 4 bits in \"imm8\", sum the four products, and conditionally store the sum in \"dst\" using the low 4 bits of \"imm8\"."
    },
    {
        "name": "_mm_cvtps_ph",
        "full_name": "__m128i _mm_cvtps_ph(__m128 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\".\n\t[round_imm_note]"
    },
    {
        "name": "_mm_mask_cvtps_ph",
        "full_name": "__m128i _mm_mask_cvtps_ph(__m128i src, __mmask8 k, __m128 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm_maskz_cvtps_ph",
        "full_name": "__m128i _mm_maskz_cvtps_ph(__mmask8 k, __m128 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_cvtps_ph",
        "full_name": "__m128i _mm256_cvtps_ph(__m256 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\".\n\t[round_imm_note]"
    },
    {
        "name": "_mm256_mask_cvtps_ph",
        "full_name": "__m128i _mm256_mask_cvtps_ph(__m128i src, __mmask8 k, __m256 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm256_maskz_cvtps_ph",
        "full_name": "__m128i _mm256_maskz_cvtps_ph(__mmask8 k, __m256 a, int imm8);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_imm_note]"
    },
    {
        "name": "_mm512_cvtps_ph",
        "full_name": "__m256i _mm512_cvtps_ph(__m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\". [round2_note]"
    },
    {
        "name": "_mm512_mask_cvtps_ph",
        "full_name": "__m256i _mm512_mask_cvtps_ph(__m256i src, __mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). [round2_note]"
    },
    {
        "name": "_mm512_maskz_cvtps_ph",
        "full_name": "__m256i _mm512_maskz_cvtps_ph(__mmask16 k, __m512 a, int sae);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed half-precision (16-bit) floating-point elements, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round2_note]"
    },
    {
        "name": "_mm_div_epi32",
        "full_name": "__m128i _mm_div_epi32(__m128i a, __m128i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_mask_div_epi32",
        "full_name": "__m512i _mm512_mask_div_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Divide packed signed 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_div_epi64",
        "full_name": "__m128i _mm_div_epi64(__m128i a, __m128i b);",
        "description": "Divide packed signed 64-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_div_epu32",
        "full_name": "__m128i _mm_div_epu32(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm512_mask_div_epu32",
        "full_name": "__m512i _mm512_mask_div_epu32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_div_epu64",
        "full_name": "__m128i _mm_div_epu64(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 64-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_shuffle_pi8",
        "full_name": "__m64 _mm_shuffle_pi8(__m64 a, __m64 b);",
        "description": "Shuffle packed 8-bit integers in \"a\" according to shuffle control mask in the corresponding 8-bit element of \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_kxor_mask8",
        "full_name": "__mmask8 _kxor_mask8(__mmask8 a, __mmask8 b);",
        "description": "Compute the bitwise XOR of 8-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kxor_mask16",
        "full_name": "__mmask16 _kxor_mask16(__mmask16 a, __mmask16 b);",
        "description": "Compute the bitwise XOR of 16-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kxor_mask32",
        "full_name": "__mmask32 _kxor_mask32(__mmask32 a, __mmask32 b);",
        "description": "Compute the bitwise XOR of 32-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_kxor_mask64",
        "full_name": "__mmask64 _kxor_mask64(__mmask64 a, __mmask64 b);",
        "description": "Compute the bitwise XOR of 64-bit masks \"a\" and \"b\", and store the result in \"k\"."
    },
    {
        "name": "_lzcnt_u32",
        "full_name": "unsigned int _lzcnt_u32(unsigned int a);",
        "description": "Count the number of leading zero bits in unsigned 32-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_lzcnt_u64",
        "full_name": "unsigned __int64 _lzcnt_u64(unsigned __int64 a);",
        "description": "Count the number of leading zero bits in unsigned 64-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_mm_or_si64",
        "full_name": "__m64 _mm_or_si64(__m64 a, __m64 b);",
        "description": "Compute the bitwise OR of 64 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_m_pand",
        "full_name": "__m64 _m_pand(__m64 a, __m64 b);",
        "description": "Compute the bitwise AND of 64 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_m_pandn",
        "full_name": "__m64 _m_pandn(__m64 a, __m64 b);",
        "description": "Compute the bitwise NOT of 64 bits (representing integer data) in \"a\" and then AND with \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_m_por",
        "full_name": "__m64 _m_por(__m64 a, __m64 b);",
        "description": "Compute the bitwise OR of 64 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_m_pxor",
        "full_name": "__m64 _m_pxor(__m64 a, __m64 b);",
        "description": "Compute the bitwise XOR of 64 bits (representing integer data) in \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_rotl",
        "full_name": "unsigned int _rotl(unsigned int a, const int shift);",
        "description": "Shift the bits of unsigned 32-bit integer \"a\" left by the number of bits specified in \"shift\", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_rotr",
        "full_name": "unsigned int _rotr(unsigned int a, const int shift);",
        "description": "Shift the bits of unsigned 32-bit integer \"a\" right by the number of bits specified in \"shift\", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_rotwl",
        "full_name": "unsigned short _rotwl(unsigned short a, const int shift);",
        "description": "Shift the bits of unsigned 16-bit integer \"a\" left by the number of bits specified in \"shift\", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_rotwr",
        "full_name": "unsigned short _rotwr(unsigned short a, const int shift);",
        "description": "Shift the bits of unsigned 16-bit integer \"a\" right by the number of bits specified in \"shift\", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_mm512_reduce_and_epi32",
        "full_name": "int _mm512_reduce_and_epi32(__m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by bitwise AND. Returns the bitwise AND of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_and_epi32",
        "full_name": "int _mm512_mask_reduce_and_epi32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by bitwise AND using mask \"k\". Returns the bitwise AND of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_and_epi64",
        "full_name": "__int64 _mm512_reduce_and_epi64(__m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by bitwise AND. Returns the bitwise AND of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_and_epi64",
        "full_name": "__int64 _mm512_mask_reduce_and_epi64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by bitwise AND using mask \"k\". Returns the bitwise AND of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_or_epi32",
        "full_name": "int _mm512_reduce_or_epi32(__m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by bitwise OR. Returns the bitwise OR of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_or_epi32",
        "full_name": "int _mm512_mask_reduce_or_epi32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by bitwise OR using mask \"k\". Returns the bitwise OR of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_or_epi64",
        "full_name": "__int64 _mm512_reduce_or_epi64(__m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by bitwise OR. Returns the bitwise OR of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_or_epi64",
        "full_name": "__int64 _mm512_mask_reduce_or_epi64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by bitwise OR using mask \"k\". Returns the bitwise OR of all active elements in \"a\"."
    },
    {
        "name": "_mm512_cmplt_epi64_mask",
        "full_name": "__mmask8 _mm512_cmplt_epi64_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmplt_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_cmplt_epi64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmplt_epi8_mask",
        "full_name": "__mmask16 _mm_cmplt_epi8_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmplt_epi8_mask",
        "full_name": "__mmask32 _mm256_cmplt_epi8_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmplt_epu16_mask",
        "full_name": "__mmask8 _mm_cmplt_epu16_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmplt_epu16_mask",
        "full_name": "__mmask16 _mm256_cmplt_epu16_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmplt_epu16_mask",
        "full_name": "__mmask32 _mm512_cmplt_epu16_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmplt_epu32_mask",
        "full_name": "__mmask8 _mm_cmplt_epu32_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmplt_epu32_mask",
        "full_name": "__mmask8 _mm_mask_cmplt_epu32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmplt_epu32_mask",
        "full_name": "__mmask8 _mm256_cmplt_epu32_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmplt_epu32_mask",
        "full_name": "__mmask16 _mm512_cmplt_epu32_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmplt_epu64_mask",
        "full_name": "__mmask8 _mm_cmplt_epu64_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmplt_epu16_mask",
        "full_name": "__mmask16 _mm256_mask_cmplt_epu16_mask(__mmask16 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmplt_epu32_mask",
        "full_name": "__mmask8 _mm256_mask_cmplt_epu32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmplt_epi64_mask",
        "full_name": "__mmask8 _mm256_cmplt_epi64_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmplt_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_cmplt_epi64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cmplt_epu64_mask",
        "full_name": "__mmask8 _mm_mask_cmplt_epu64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmplt_epu64_mask",
        "full_name": "__mmask8 _mm256_cmplt_epu64_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmplt_epu64_mask",
        "full_name": "__mmask8 _mm512_cmplt_epu64_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmplt_epu64_mask",
        "full_name": "__mmask8 _mm512_mask_cmplt_epu64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmplt_epu8_mask",
        "full_name": "__mmask16 _mm_cmplt_epu8_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmplt_epu8_mask",
        "full_name": "__mmask16 _mm_mask_cmplt_epu8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpeq_epi32_mask",
        "full_name": "__mmask8 _mm_cmpeq_epi32_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmpeq_epi32_mask",
        "full_name": "__mmask8 _mm256_cmpeq_epi32_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmpneq_epi32_mask",
        "full_name": "__mmask8 _mm_cmpneq_epi32_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpeq_epi32_mask",
        "full_name": "__mmask8 _mm_mask_cmpeq_epi32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cmpneq_epi32_mask",
        "full_name": "__mmask8 _mm_mask_cmpneq_epi32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpneq_epi32_mask",
        "full_name": "__mmask8 _mm256_cmpneq_epi32_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmpeq_epu32_mask",
        "full_name": "__mmask8 _mm_cmpeq_epu32_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpeq_epu32_mask",
        "full_name": "__mmask8 _mm_mask_cmpeq_epu32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmplt_epu8_mask",
        "full_name": "__mmask32 _mm256_cmplt_epu8_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmplt_epu8_mask",
        "full_name": "__mmask32 _mm256_mask_cmplt_epu8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmplt_epu8_mask",
        "full_name": "__mmask64 _mm512_cmplt_epu8_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmplt_epu8_mask",
        "full_name": "__mmask64 _mm512_mask_cmplt_epu8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmpneq_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpneq_epi32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epu32_mask",
        "full_name": "__mmask8 _mm256_cmpeq_epu32_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpeq_epu32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpeq_epu32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpeq_epu32_mask",
        "full_name": "__mmask16 _mm512_cmpeq_epu32_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_epu32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpeq_epu32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpeq_epu64_mask",
        "full_name": "__mmask8 _mm_cmpeq_epu64_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpeq_epu64_mask",
        "full_name": "__mmask8 _mm_mask_cmpeq_epu64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epu64_mask",
        "full_name": "__mmask8 _mm256_cmpeq_epu64_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpeq_epu64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpeq_epu64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpeq_epu64_mask",
        "full_name": "__mmask8 _mm512_cmpeq_epu64_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpeq_epu64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpeq_epu64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpeq_epi64_mask",
        "full_name": "__mmask8 _mm512_cmpeq_epi64_mask(__m512i a, __m512i b);",
        "description": "Compare packed 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmpneq_epi64_mask",
        "full_name": "__mmask8 _mm512_cmpneq_epi64_mask(__m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpneq_epi64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpeq_epi64_mask",
        "full_name": "__mmask8 _mm256_cmpeq_epi64_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmpneq_epi64_mask",
        "full_name": "__mmask8 _mm256_cmpneq_epi64_mask(__m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpneq_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpneq_epi64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpeq_epi64_mask",
        "full_name": "__mmask8 _mm_cmpeq_epi64_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmpneq_epi64_mask",
        "full_name": "__mmask8 _mm_cmpneq_epi64_mask(__m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpneq_epi64_mask",
        "full_name": "__mmask8 _mm_mask_cmpneq_epi64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpneq_epu32_mask",
        "full_name": "__mmask8 _mm_cmpneq_epu32_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpneq_epu32_mask",
        "full_name": "__mmask8 _mm_mask_cmpneq_epu32_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpneq_epu32_mask",
        "full_name": "__mmask8 _mm256_cmpneq_epu32_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpneq_epu32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpneq_epu32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpneq_epu32_mask",
        "full_name": "__mmask16 _mm512_cmpneq_epu32_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_epu32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpneq_epu32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmpneq_epu64_mask",
        "full_name": "__mmask8 _mm_cmpneq_epu64_mask(__m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmpneq_epu64_mask",
        "full_name": "__mmask8 _mm_mask_cmpneq_epu64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmpneq_epu64_mask",
        "full_name": "__mmask8 _mm256_cmpneq_epu64_mask(__m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmpneq_epu64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpneq_epu64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpneq_epu64_mask",
        "full_name": "__mmask8 _mm512_cmpneq_epu64_mask(__m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_epu64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpneq_epu64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmplt_pd_mask",
        "full_name": "__mmask8 _mm512_cmplt_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmplt_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmplt_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmplt_ps_mask",
        "full_name": "__mmask16 _mm512_cmplt_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmplt_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmplt_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpnle_pd_mask",
        "full_name": "__mmask8 _mm512_cmpnle_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpnle_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmpnle_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpnle_ps_mask",
        "full_name": "__mmask16 _mm512_cmpnle_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpnle_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmpnle_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpnlt_pd_mask",
        "full_name": "__mmask8 _mm512_cmpnlt_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpnlt_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmpnlt_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpnlt_ps_mask",
        "full_name": "__mmask16 _mm512_cmpnlt_ps_mask(__m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpnlt_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmpnlt_ps_mask(__mmask16 k1, __m512 a, __m512 b);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" for not-less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmpeq_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_cmpeq_epi32_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmpeq_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpeq_epi32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cmpeq_epi64_mask",
        "full_name": "__mmask8 _mm_mask_cmpeq_epi64_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmpeq_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_cmpeq_epi64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmpeq_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_cmpeq_epi64_mask(__mmask8 k1, __m512i a, __m512i b);",
        "description": "Compare packed 64-bit integers in \"a\" and \"b\" for equality, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cmplt_epi8_mask",
        "full_name": "__mmask16 _mm_mask_cmplt_epi8_mask(__mmask16 k1, __m128i a, __m128i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmplt_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_cmplt_epi8_mask(__mmask32 k1, __m256i a, __m256i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmplt_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_cmplt_epi8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cmplt_epu16_mask",
        "full_name": "__mmask8 _mm_mask_cmplt_epu16_mask(__mmask8 k1, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmplt_epu16_mask",
        "full_name": "__mmask32 _mm512_mask_cmplt_epu16_mask(__mmask32 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmplt_epu32_mask",
        "full_name": "__mmask16 _mm512_mask_cmplt_epu32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" for less-than-or-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmplt_epu64_mask",
        "full_name": "__mmask8 _mm256_mask_cmplt_epu64_mask(__mmask8 k1, __m256i a, __m256i b);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" for less-than, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmpneq_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_cmpneq_epi8_mask(__mmask64 k1, __m512i a, __m512i b);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmpneq_pd_mask",
        "full_name": "__mmask8 _mm512_cmpneq_pd_mask(__m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmpneq_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmpneq_pd_mask(__mmask8 k1, __m512d a, __m512d b);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpackhi_epi64",
        "full_name": "__m128i _mm_mask_unpackhi_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpackhi_epi64",
        "full_name": "__m128i _mm_maskz_unpackhi_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_unpackhi_epi64",
        "full_name": "__m256i _mm256_mask_unpackhi_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpackhi_epi64",
        "full_name": "__m256i _mm256_maskz_unpackhi_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpackhi_epi64",
        "full_name": "__m256i _mm256_unpackhi_epi64(__m256i a, __m256i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_unpackhi_epi64",
        "full_name": "__m512i _mm512_unpackhi_epi64(__m512i a, __m512i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpackhi_epi64",
        "full_name": "__m512i _mm512_mask_unpackhi_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpackhi_epi64",
        "full_name": "__m512i _mm512_maskz_unpackhi_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 64-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpackhi_epi8",
        "full_name": "__m128i _mm_mask_unpackhi_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpackhi_epi8",
        "full_name": "__m128i _mm_maskz_unpackhi_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpackhi_pd",
        "full_name": "__m256d _mm256_unpackhi_pd(__m256d a, __m256d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpackhi_pd",
        "full_name": "__m256d _mm256_mask_unpackhi_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpackhi_ps",
        "full_name": "__m128 _mm_mask_unpackhi_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpackhi_ps",
        "full_name": "__m128 _mm_maskz_unpackhi_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpackhi_ps",
        "full_name": "__m256 _mm256_unpackhi_ps(__m256 a, __m256 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpackhi_ps",
        "full_name": "__m256 _mm256_mask_unpackhi_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpackhi_ps",
        "full_name": "__m256 _mm256_maskz_unpackhi_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpackhi_ps",
        "full_name": "__m512 _mm512_unpackhi_ps(__m512 a, __m512 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpackhi_ps",
        "full_name": "__m512 _mm512_mask_unpackhi_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpackhi_ps",
        "full_name": "__m512 _mm512_maskz_unpackhi_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpacklo_epi16",
        "full_name": "__m128i _mm_mask_unpacklo_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpacklo_epi16",
        "full_name": "__m128i _mm_maskz_unpacklo_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpacklo_epi16",
        "full_name": "__m256i _mm256_unpacklo_epi16(__m256i a, __m256i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpacklo_epi16",
        "full_name": "__m256i _mm256_mask_unpacklo_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpacklo_epi16",
        "full_name": "__m256i _mm256_maskz_unpacklo_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpacklo_epi16",
        "full_name": "__m512i _mm512_unpacklo_epi16(__m512i a, __m512i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpacklo_epi16",
        "full_name": "__m512i _mm512_mask_unpacklo_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpacklo_epi16",
        "full_name": "__m512i _mm512_maskz_unpacklo_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 16-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpacklo_epi32",
        "full_name": "__m128i _mm_mask_unpacklo_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpacklo_epi32",
        "full_name": "__m128i _mm_maskz_unpacklo_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpacklo_epi32",
        "full_name": "__m256i _mm256_unpacklo_epi32(__m256i a, __m256i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpacklo_epi32",
        "full_name": "__m256i _mm256_mask_unpacklo_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpacklo_epi32",
        "full_name": "__m256i _mm256_maskz_unpacklo_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpacklo_epi32",
        "full_name": "__m512i _mm512_unpacklo_epi32(__m512i a, __m512i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpacklo_epi32",
        "full_name": "__m512i _mm512_mask_unpacklo_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpacklo_epi32",
        "full_name": "__m512i _mm512_maskz_unpacklo_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 32-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpacklo_epi64",
        "full_name": "__m128i _mm_mask_unpacklo_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpacklo_epi64",
        "full_name": "__m128i _mm_maskz_unpacklo_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpacklo_epi64",
        "full_name": "__m256i _mm256_unpacklo_epi64(__m256i a, __m256i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpacklo_epi64",
        "full_name": "__m256i _mm256_mask_unpacklo_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpacklo_epi64",
        "full_name": "__m256i _mm256_maskz_unpacklo_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpacklo_epi64",
        "full_name": "__m512i _mm512_unpacklo_epi64(__m512i a, __m512i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpacklo_epi64",
        "full_name": "__m512i _mm512_mask_unpacklo_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpacklo_epi64",
        "full_name": "__m512i _mm512_maskz_unpacklo_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 64-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpacklo_epi8",
        "full_name": "__m128i _mm_mask_unpacklo_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpacklo_epi8",
        "full_name": "__m128i _mm_maskz_unpacklo_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_unpacklo_epi8",
        "full_name": "__m256i _mm256_mask_unpacklo_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpacklo_epi8",
        "full_name": "__m256i _mm256_maskz_unpacklo_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_unpacklo_epi8",
        "full_name": "__m512i _mm512_mask_unpacklo_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpacklo_epi8",
        "full_name": "__m512i _mm512_maskz_unpacklo_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 8-bit integers from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpacklo_pd",
        "full_name": "__m128d _mm_mask_unpacklo_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpacklo_pd",
        "full_name": "__m128d _mm_maskz_unpacklo_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_unpacklo_pd",
        "full_name": "__m256d _mm256_unpacklo_pd(__m256d a, __m256d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpacklo_pd",
        "full_name": "__m256d _mm256_mask_unpacklo_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpacklo_pd",
        "full_name": "__m256d _mm256_maskz_unpacklo_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpacklo_pd",
        "full_name": "__m512d _mm512_unpacklo_pd(__m512d a, __m512d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpacklo_pd",
        "full_name": "__m512d _mm512_mask_unpacklo_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpacklo_pd",
        "full_name": "__m512d _mm512_maskz_unpacklo_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_unpackhi_epi8",
        "full_name": "__m256i _mm256_mask_unpackhi_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpackhi_epi8",
        "full_name": "__m256i _mm256_maskz_unpackhi_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_unpackhi_epi8",
        "full_name": "__m512i _mm512_mask_unpackhi_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpackhi_epi8",
        "full_name": "__m512i _mm512_maskz_unpackhi_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 8-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpackhi_pd",
        "full_name": "__m128d _mm_mask_unpackhi_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpackhi_pd",
        "full_name": "__m128d _mm_maskz_unpackhi_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpackhi_pd",
        "full_name": "__m256d _mm256_maskz_unpackhi_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_unpackhi_pd",
        "full_name": "__m512d _mm512_mask_unpackhi_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpackhi_pd",
        "full_name": "__m512d _mm512_maskz_unpackhi_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_atan_pd",
        "full_name": "__m128d _mm_atan_pd(__m128d a);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_atan_pd",
        "full_name": "__m256d _mm256_atan_pd(__m256d a);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_atan_pd",
        "full_name": "__m512d _mm512_atan_pd(__m512d a);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\" and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_mask_atan_pd",
        "full_name": "__m512d _mm512_mask_atan_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" expressed in radians using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_atan_ps",
        "full_name": "__m128 _mm_atan_ps(__m128 a);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_atan_ps",
        "full_name": "__m256 _mm256_atan_ps(__m256 a);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_atan_ps",
        "full_name": "__m512 _mm512_atan_ps(__m512 a);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_mask_atan_ps",
        "full_name": "__m512 _mm512_mask_atan_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_atan2_pd",
        "full_name": "__m128d _mm_atan2_pd(__m128d a, __m128d b);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm256_atan2_pd",
        "full_name": "__m256d _mm256_atan2_pd(__m256d a, __m256d b);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_atan2_pd",
        "full_name": "__m512d _mm512_atan2_pd(__m512d a, __m512d b);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_mask_atan2_pd",
        "full_name": "__m512d _mm512_mask_atan2_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the inverse tangent of packed double-precision (64-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_atan2_ps",
        "full_name": "__m128 _mm_atan2_ps(__m128 a, __m128 b);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm256_atan2_ps",
        "full_name": "__m256 _mm256_atan2_ps(__m256 a, __m256 b);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_atan2_ps",
        "full_name": "__m512 _mm512_atan2_ps(__m512 a, __m512 b);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_mask_atan2_ps",
        "full_name": "__m512 _mm512_mask_atan2_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the inverse tangent of packed single-precision (32-bit) floating-point elements in \"a\" divided by packed elements in \"b\", and store the results in \"dst\" expressed in radians using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_inserti32x4",
        "full_name": "__m512i _mm512_inserti32x4(__m512i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 4 packed 32-bit integers) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm_mask_cvtsepi64_storeu_epi16",
        "full_name": "void _mm_mask_cvtsepi64_storeu_epi16(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtsepi64_storeu_epi16",
        "full_name": "void _mm256_mask_cvtsepi64_storeu_epi16(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtsepi64_storeu_epi16",
        "full_name": "void _mm512_mask_cvtsepi64_storeu_epi16(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtsepi64_storeu_epi32",
        "full_name": "void _mm_mask_cvtsepi64_storeu_epi32(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtsepi64_storeu_epi32",
        "full_name": "void _mm256_mask_cvtsepi64_storeu_epi32(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtsepi64_storeu_epi32",
        "full_name": "void _mm512_mask_cvtsepi64_storeu_epi32(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtsepi64_storeu_epi8",
        "full_name": "void _mm_mask_cvtsepi64_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtsepi64_storeu_epi8",
        "full_name": "void _mm256_mask_cvtsepi64_storeu_epi8(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtsepi64_storeu_epi8",
        "full_name": "void _mm512_mask_cvtsepi64_storeu_epi8(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtsepi32_storeu_epi16",
        "full_name": "void _mm_mask_cvtsepi32_storeu_epi16(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtsepi32_storeu_epi16",
        "full_name": "void _mm256_mask_cvtsepi32_storeu_epi16(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtsepi32_storeu_epi16",
        "full_name": "void _mm512_mask_cvtsepi32_storeu_epi16(void *base_addr, __mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtsepi32_storeu_epi8",
        "full_name": "void _mm_mask_cvtsepi32_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtsepi32_storeu_epi8",
        "full_name": "void _mm256_mask_cvtsepi32_storeu_epi8(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtsepi32_storeu_epi8",
        "full_name": "void _mm512_mask_cvtsepi32_storeu_epi8(void *base_addr, __mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_cvtsepi32_epi8",
        "full_name": "__m128i _mm512_cvtsepi32_epi8(__m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtsepi32_epi8",
        "full_name": "__m128i _mm512_mask_cvtsepi32_epi8(__m128i src, __mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtsepi32_epi8",
        "full_name": "__m128i _mm512_maskz_cvtsepi32_epi8(__mmask16 k, __m512i a);",
        "description": "Convert packed signed 32-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtsepi64_epi16",
        "full_name": "__m128i _mm_cvtsepi64_epi16(__m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtsepi64_epi16",
        "full_name": "__m128i _mm_mask_cvtsepi64_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtsepi64_epi16",
        "full_name": "__m128i _mm_maskz_cvtsepi64_epi16(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtsepi64_epi16",
        "full_name": "__m128i _mm256_cvtsepi64_epi16(__m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtsepi64_epi16",
        "full_name": "__m128i _mm256_mask_cvtsepi64_epi16(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtsepi64_epi16",
        "full_name": "__m128i _mm256_maskz_cvtsepi64_epi16(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtsepi64_epi16",
        "full_name": "__m128i _mm512_cvtsepi64_epi16(__m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtsepi64_epi16",
        "full_name": "__m128i _mm512_mask_cvtsepi64_epi16(__m128i src, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtsepi64_epi16",
        "full_name": "__m128i _mm512_maskz_cvtsepi64_epi16(__mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 16-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtsepi64_epi32",
        "full_name": "__m128i _mm_cvtsepi64_epi32(__m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtsepi64_epi32",
        "full_name": "__m128i _mm_mask_cvtsepi64_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtsepi64_epi32",
        "full_name": "__m128i _mm_maskz_cvtsepi64_epi32(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtsepi64_epi32",
        "full_name": "__m128i _mm256_cvtsepi64_epi32(__m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtsepi64_epi32",
        "full_name": "__m128i _mm256_mask_cvtsepi64_epi32(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtsepi64_epi32",
        "full_name": "__m128i _mm256_maskz_cvtsepi64_epi32(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtsepi64_epi32",
        "full_name": "__m256i _mm512_cvtsepi64_epi32(__m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtsepi64_epi32",
        "full_name": "__m256i _mm512_mask_cvtsepi64_epi32(__m256i src, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtsepi64_epi32",
        "full_name": "__m256i _mm512_maskz_cvtsepi64_epi32(__mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 32-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtsepi64_epi8",
        "full_name": "__m128i _mm_cvtsepi64_epi8(__m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_cvtsepi64_epi8",
        "full_name": "__m128i _mm_mask_cvtsepi64_epi8(__m128i src, __mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_cvtsepi64_epi8",
        "full_name": "__m128i _mm_maskz_cvtsepi64_epi8(__mmask8 k, __m128i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cvtsepi64_epi8",
        "full_name": "__m128i _mm256_cvtsepi64_epi8(__m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_cvtsepi64_epi8",
        "full_name": "__m128i _mm256_mask_cvtsepi64_epi8(__m128i src, __mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_cvtsepi64_epi8",
        "full_name": "__m128i _mm256_maskz_cvtsepi64_epi8(__mmask8 k, __m256i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cvtsepi64_epi8",
        "full_name": "__m128i _mm512_cvtsepi64_epi8(__m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cvtsepi64_epi8",
        "full_name": "__m128i _mm512_mask_cvtsepi64_epi8(__m128i src, __mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_cvtsepi64_epi8",
        "full_name": "__m128i _mm512_maskz_cvtsepi64_epi8(__mmask8 k, __m512i a);",
        "description": "Convert packed signed 64-bit integers in \"a\" to packed 8-bit integers with signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cvtss_sd",
        "full_name": "__m128d _mm_mask_cvtss_sd(__m128d src, __mmask8 k, __m128d a, __m128 b);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_cvtss_sd",
        "full_name": "__m128d _mm_maskz_cvtss_sd(__mmask8 k, __m128d a, __m128 b);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm256_inserti32x4",
        "full_name": "__m256i _mm256_inserti32x4(__m256i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 4 packed 32-bit integers) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_mask_inserti32x4",
        "full_name": "__m256i _mm256_mask_inserti32x4(__m256i src, __mmask8 k, __m256i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed 32-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_inserti32x4",
        "full_name": "__m256i _mm256_maskz_inserti32x4(__mmask8 k, __m256i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed 32-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_inserti32x4",
        "full_name": "__m512i _mm512_mask_inserti32x4(__m512i src, __mmask16 k, __m512i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed 32-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_inserti32x4",
        "full_name": "__m512i _mm512_maskz_inserti32x4(__mmask16 k, __m512i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 4 packed 32-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_inserti64x2",
        "full_name": "__m256i _mm256_inserti64x2(__m256i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 2 packed 64-bit integers) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm256_mask_inserti64x2",
        "full_name": "__m256i _mm256_mask_inserti64x2(__m256i src, __mmask8 k, __m256i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed 64-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_inserti64x2",
        "full_name": "__m256i _mm256_maskz_inserti64x2(__mmask8 k, __m256i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed 64-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_inserti64x2",
        "full_name": "__m512i _mm512_inserti64x2(__m512i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"dst\", then insert 128 bits (composed of 2 packed 64-bit integers) from \"b\" into \"dst\" at the location specified by \"imm8\"."
    },
    {
        "name": "_mm512_mask_inserti64x2",
        "full_name": "__m512i _mm512_mask_inserti64x2(__m512i src, __mmask8 k, __m512i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed 64-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_inserti64x2",
        "full_name": "__m512i _mm512_maskz_inserti64x2(__mmask8 k, __m512i a, __m128i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 128 bits (composed of 2 packed 64-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_inserti64x4",
        "full_name": "__m512i _mm512_mask_inserti64x4(__m512i src, __mmask8 k, __m512i a, __m256i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 4 packed 64-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_inserti64x4",
        "full_name": "__m512i _mm512_maskz_inserti64x4(__mmask8 k, __m512i a, __m256i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 4 packed 64-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_inserti32x8",
        "full_name": "__m512i _mm512_mask_inserti32x8(__m512i src, __mmask16 k, __m512i a, __m256i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 8 packed 32-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_inserti32x8",
        "full_name": "__m512i _mm512_maskz_inserti32x8(__mmask16 k, __m512i a, __m256i b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 8 packed 32-bit integers) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_insertf64x4",
        "full_name": "__m512d _mm512_maskz_insertf64x4(__mmask8 k, __m512d a, __m256d b, int imm8);",
        "description": "Copy \"a\" to \"tmp\", then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"b\" into \"tmp\" at the location specified by \"imm8\".  Store \"tmp\" to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpackhi_pd",
        "full_name": "__m512d _mm512_unpackhi_pd(__m512d a, __m512d b);",
        "description": "Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cmpneq_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_cmpneq_epi32_mask(__mmask16 k1, __m512i a, __m512i b);",
        "description": "Compare packed 32-bit integers in \"a\" and \"b\" for not-equal, and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_atanh_pd",
        "full_name": "__m128d _mm_atanh_pd(__m128d a);",
        "description": "Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_atanh_pd",
        "full_name": "__m256d _mm256_atanh_pd(__m256d a);",
        "description": "Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_atanh_pd",
        "full_name": "__m512d _mm512_atanh_pd(__m512d a);",
        "description": "Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\" and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_mask_atanh_pd",
        "full_name": "__m512d _mm512_mask_atanh_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" expressed in radians using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_atanh_ps",
        "full_name": "__m128 _mm_atanh_ps(__m128 a);",
        "description": "Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_atanh_ps",
        "full_name": "__m256 _mm256_atanh_ps(__m256 a);",
        "description": "Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_atanh_ps",
        "full_name": "__m512 _mm512_atanh_ps(__m512 a);",
        "description": "Compute the inverse hyperblic tangent of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" expressed in radians."
    },
    {
        "name": "_mm512_mask_atanh_ps",
        "full_name": "__m512 _mm512_mask_atanh_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse hyperbolic tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cvtss_si32",
        "full_name": "int _mm_cvtss_si32(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtss_si64",
        "full_name": "__int64 _mm_cvtss_si64(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtss_u32",
        "full_name": "unsigned int _mm_cvtss_u32(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 32-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtss_u64",
        "full_name": "unsigned __int64 _mm_cvtss_u64(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 64-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtt_ps2pi",
        "full_name": "__m64 _mm_cvtt_ps2pi(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtss_i32",
        "full_name": "int _mm_cvtss_i32(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtss_i64",
        "full_name": "__int64 _mm_cvtss_i64(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm512_int2mask",
        "full_name": "__mmask16 _mm512_int2mask(int mask);",
        "description": "Converts integer \"mask\" into bitmask, storing the result in \"dst\"."
    },
    {
        "name": "_mm512_kmov",
        "full_name": "__mmask16 _mm512_kmov(__mmask16 a);",
        "description": "Copy 16-bit mask \"a\" to \"k\"."
    },
    {
        "name": "_mm512_kunpackb",
        "full_name": "__mmask16 _mm512_kunpackb(__mmask16 a, __mmask16 b);",
        "description": "Unpack and interleave 8 bits from masks \"a\" and \"b\", and store the 16-bit result in \"k\"."
    },
    {
        "name": "_mm512_kunpackw",
        "full_name": "__mmask32 _mm512_kunpackw(__mmask32 a, __mmask32 b);",
        "description": "Unpack and interleave 16 bits from masks \"a\" and \"b\", and store the 32-bit result in \"dst\"."
    },
    {
        "name": "_mm512_kunpackd",
        "full_name": "__mmask64 _mm512_kunpackd(__mmask64 a, __mmask64 b);",
        "description": "Unpack and interleave 32 bits from masks \"a\" and \"b\", and store the 64-bit result in \"dst\"."
    },
    {
        "name": "_mm_mask_max_epu32",
        "full_name": "__m128i _mm_mask_max_epu32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_max_epu32",
        "full_name": "__m128i _mm_maskz_max_epu32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed maximum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_min_epu32",
        "full_name": "__m128i _mm_mask_min_epu32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_min_epu32",
        "full_name": "__m128i _mm_maskz_min_epu32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\", and store packed minimum values in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_movdir64b",
        "full_name": "void _movdir64b(void *dst, const void *src);",
        "description": "Move 64-byte (512-bit) value using direct store from source memory address \"src\" to destination memory address \"dst\"."
    },
    {
        "name": "_mm256_movemask_pd",
        "full_name": "int _mm256_movemask_pd(__m256d a);",
        "description": "Set each bit of mask \"dst\" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in \"a\"."
    },
    {
        "name": "_mm_movepi8_mask",
        "full_name": "__mmask16 _mm_movepi8_mask(__m128i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 8-bit integer in \"a\"."
    },
    {
        "name": "_mm256_movepi8_mask",
        "full_name": "__mmask32 _mm256_movepi8_mask(__m256i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 8-bit integer in \"a\"."
    },
    {
        "name": "_mm512_movepi8_mask",
        "full_name": "__mmask64 _mm512_movepi8_mask(__m512i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 8-bit integer in \"a\"."
    },
    {
        "name": "_mm_movepi16_mask",
        "full_name": "__mmask8 _mm_movepi16_mask(__m128i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 16-bit integer in \"a\"."
    },
    {
        "name": "_mm256_movepi16_mask",
        "full_name": "__mmask16 _mm256_movepi16_mask(__m256i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 16-bit integer in \"a\"."
    },
    {
        "name": "_mm512_movepi16_mask",
        "full_name": "__mmask32 _mm512_movepi16_mask(__m512i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 16-bit integer in \"a\"."
    },
    {
        "name": "_mm_movepi32_mask",
        "full_name": "__mmask8 _mm_movepi32_mask(__m128i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 32-bit integer in \"a\"."
    },
    {
        "name": "_mm256_movepi32_mask",
        "full_name": "__mmask8 _mm256_movepi32_mask(__m256i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 32-bit integer in \"a\"."
    },
    {
        "name": "_mm512_movepi32_mask",
        "full_name": "__mmask16 _mm512_movepi32_mask(__m512i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 32-bit integer in \"a\"."
    },
    {
        "name": "_mm_movepi64_mask",
        "full_name": "__mmask8 _mm_movepi64_mask(__m128i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 64-bit integer in \"a\"."
    },
    {
        "name": "_mm256_movepi64_mask",
        "full_name": "__mmask8 _mm256_movepi64_mask(__m256i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 64-bit integer in \"a\"."
    },
    {
        "name": "_mm512_movepi64_mask",
        "full_name": "__mmask8 _mm512_movepi64_mask(__m512i a);",
        "description": "Set each bit of mask register \"k\" based on the most significant bit of the corresponding packed 64-bit integer in \"a\"."
    },
    {
        "name": "_mm_mask_mul_epi32",
        "full_name": "__m128i _mm_mask_mul_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mul_epi32",
        "full_name": "__m128i _mm_maskz_mul_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mul_epi32",
        "full_name": "__m256i _mm256_mask_mul_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mul_epi32",
        "full_name": "__m256i _mm256_maskz_mul_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mul_epi32",
        "full_name": "__m512i _mm512_mask_mul_epi32(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mul_epi32",
        "full_name": "__m512i _mm512_maskz_mul_epi32(__mmask8 k, __m512i a, __m512i b);",
        "description": "Multiply the low signed 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the signed 64-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mul_epu32",
        "full_name": "__m128i _mm_mask_mul_epu32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mul_epu32",
        "full_name": "__m128i _mm_maskz_mul_epu32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mul_epu32",
        "full_name": "__m256i _mm256_mask_mul_epu32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mul_epu32",
        "full_name": "__m256i _mm256_maskz_mul_epu32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mul_epu32",
        "full_name": "__m512i _mm512_mask_mul_epu32(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mul_epu32",
        "full_name": "__m512i _mm512_maskz_mul_epu32(__mmask8 k, __m512i a, __m512i b);",
        "description": "Multiply the low unsigned 32-bit integers from each packed 64-bit element in \"a\" and \"b\", and store the unsigned 64-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mulhi_epi16",
        "full_name": "__m128i _mm_mask_mulhi_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mulhi_epi16",
        "full_name": "__m128i _mm_maskz_mulhi_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mulhi_epi16",
        "full_name": "__m256i _mm256_mask_mulhi_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mulhi_epi16",
        "full_name": "__m256i _mm256_maskz_mulhi_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mulhi_epi16",
        "full_name": "__m512i _mm512_mask_mulhi_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mulhi_epi16",
        "full_name": "__m512i _mm512_maskz_mulhi_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply the packed signed 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_mulhi_epu16",
        "full_name": "__m128i _mm_mask_mulhi_epu16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mulhi_epu16",
        "full_name": "__m128i _mm_maskz_mulhi_epu16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mulhi_epu16",
        "full_name": "__m256i _mm256_mask_mulhi_epu16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mulhi_epu16",
        "full_name": "__m256i _mm256_maskz_mulhi_epu16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mulhi_epu16",
        "full_name": "__m512i _mm512_mask_mulhi_epu16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mulhi_epu16",
        "full_name": "__m512i _mm512_maskz_mulhi_epu16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply the packed unsigned 16-bit integers in \"a\" and \"b\", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mullo_epi64",
        "full_name": "__m128i _mm_mullo_epi64(__m128i a, __m128i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\"."
    },
    {
        "name": "_mm_mask_mullo_epi64",
        "full_name": "__m128i _mm_mask_mullo_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mullo_epi64",
        "full_name": "__m128i _mm_maskz_mullo_epi64(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mullo_epi64",
        "full_name": "__m256i _mm256_mask_mullo_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mullo_epi64",
        "full_name": "__m256i _mm256_maskz_mullo_epi64(__mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mullo_epi64",
        "full_name": "__m512i _mm512_mask_mullo_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mullo_epi64",
        "full_name": "__m512i _mm512_maskz_mullo_epi64(__mmask8 k, __m512i a, __m512i b);",
        "description": "Multiply the packed 64-bit integers in \"a\" and \"b\", producing intermediate 128-bit integers, and store the low 64 bits of the intermediate integers in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mulx_u32",
        "full_name": "unsigned int _mulx_u32(unsigned int a, unsigned int b, unsigned int *hi);",
        "description": "Multiply unsigned 32-bit integers \"a\" and \"b\", store the low 32-bits of the result in \"dst\", and store the high 32-bits in \"hi\". This does not read or write arithmetic flags."
    },
    {
        "name": "_mulx_u64",
        "full_name": "unsigned __int64 _mulx_u64(unsigned __int64 a, unsigned __int64 b, unsigned __int64 *hi);",
        "description": "Multiply unsigned 64-bit integers \"a\" and \"b\", store the low 64-bits of the result in \"dst\", and store the high 64-bits in \"hi\". This does not read or write arithmetic flags."
    },
    {
        "name": "_popcnt32",
        "full_name": "int _popcnt32(int a);",
        "description": "Count the number of bits set to 1 in 32-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_popcnt64",
        "full_name": "int _popcnt64(__int64 a);",
        "description": "Count the number of bits set to 1 in 64-bit integer \"a\", and return that count in \"dst\"."
    },
    {
        "name": "_mm_pow_pd",
        "full_name": "__m128d _mm_pow_pd(__m128d a, __m128d b);",
        "description": "Compute the exponential value of packed double-precision (64-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_pow_pd",
        "full_name": "__m256d _mm256_pow_pd(__m256d a, __m256d b);",
        "description": "Compute the exponential value of packed double-precision (64-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_pow_pd",
        "full_name": "__m512d _mm512_pow_pd(__m512d a, __m512d b);",
        "description": "Compute the exponential value of packed double-precision (64-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_pow_pd",
        "full_name": "__m512d _mm512_mask_pow_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Compute the exponential value of packed double-precision (64-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_pow_ps",
        "full_name": "__m128 _mm_pow_ps(__m128 a, __m128 b);",
        "description": "Compute the exponential value of packed single-precision (32-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_pow_ps",
        "full_name": "__m256 _mm256_pow_ps(__m256 a, __m256 b);",
        "description": "Compute the exponential value of packed single-precision (32-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_pow_ps",
        "full_name": "__m512 _mm512_pow_ps(__m512 a, __m512 b);",
        "description": "Compute the exponential value of packed single-precision (32-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_pow_ps",
        "full_name": "__m512 _mm512_mask_pow_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Compute the exponential value of packed single-precision (32-bit) floating-point elements in \"a\" raised by packed elements in \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_reduce_mul_epi32",
        "full_name": "int _mm512_reduce_mul_epi32(__m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by multiplication. Returns the product of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_mul_epi32",
        "full_name": "int _mm512_mask_reduce_mul_epi32(__mmask16 k, __m512i a);",
        "description": "Reduce the packed 32-bit integers in \"a\" by multiplication using mask \"k\". Returns the product of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_mul_epi64",
        "full_name": "__int64 _mm512_reduce_mul_epi64(__m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by multiplication. Returns the product of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_mul_epi64",
        "full_name": "__int64 _mm512_mask_reduce_mul_epi64(__mmask8 k, __m512i a);",
        "description": "Reduce the packed 64-bit integers in \"a\" by multiplication using mask \"k\". Returns the product of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_mul_ps",
        "full_name": "float _mm512_reduce_mul_ps(__m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by multiplication. Returns the product of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_mul_ps",
        "full_name": "float _mm512_mask_reduce_mul_ps(__mmask16 k, __m512 a);",
        "description": "Reduce the packed single-precision (32-bit) floating-point elements in \"a\" by multiplication using mask \"k\". Returns the product of all active elements in \"a\"."
    },
    {
        "name": "_mm512_reduce_mul_pd",
        "full_name": "double _mm512_reduce_mul_pd(__m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by multiplication. Returns the product of all elements in \"a\"."
    },
    {
        "name": "_mm512_mask_reduce_mul_pd",
        "full_name": "double _mm512_mask_reduce_mul_pd(__mmask8 k, __m512d a);",
        "description": "Reduce the packed double-precision (64-bit) floating-point elements in \"a\" by multiplication using mask \"k\". Returns the product of all active elements in \"a\"."
    },
    {
        "name": "_mm256_set_m128",
        "full_name": "__m256 _mm256_set_m128(__m128 hi, __m128 lo);",
        "description": "Set packed __m256 vector \"dst\" with the supplied values."
    },
    {
        "name": "_mm256_set_m128d",
        "full_name": "__m256d _mm256_set_m128d(__m128d hi, __m128d lo);",
        "description": "Set packed __m256d vector \"dst\" with the supplied values."
    },
    {
        "name": "_mm_mask_set1_epi16",
        "full_name": "__m128i _mm_mask_set1_epi16(__m128i src, __mmask8 k, short a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_set1_epi16",
        "full_name": "__m128i _mm_maskz_set1_epi16(__mmask8 k, short a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_set1_epi16",
        "full_name": "__m256i _mm256_mask_set1_epi16(__m256i src, __mmask16 k, short a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_set1_epi16",
        "full_name": "__m256i _mm256_maskz_set1_epi16(__mmask16 k, short a);",
        "description": "Broadcast 16-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_set1_epi16",
        "full_name": "__m256i _mm256_set1_epi16(short a);",
        "description": "Broadcast 16-bit integer \"a\" to all all elements of \"dst\". This intrinsic may generate the \"vpbroadcastw\"."
    },
    {
        "name": "_mm512_set1_epi16",
        "full_name": "__m512i _mm512_set1_epi16(short a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all all elements of \"dst\"."
    },
    {
        "name": "_mm512_mask_set1_epi16",
        "full_name": "__m512i _mm512_mask_set1_epi16(__m512i src, __mmask32 k, short a);",
        "description": "Broadcast 16-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_set1_epi16",
        "full_name": "__m512i _mm512_maskz_set1_epi16(__mmask32 k, short a);",
        "description": "Broadcast the low packed 16-bit integer from \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_set1_epi8",
        "full_name": "__m128i _mm_mask_set1_epi8(__m128i src, __mmask16 k, char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_set1_epi8",
        "full_name": "__m128i _mm_maskz_set1_epi8(__mmask16 k, char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_set1_epi8",
        "full_name": "__m256i _mm256_mask_set1_epi8(__m256i src, __mmask32 k, char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_set1_epi8",
        "full_name": "__m256i _mm256_maskz_set1_epi8(__mmask32 k, char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_set1_epi8",
        "full_name": "__m512i _mm512_mask_set1_epi8(__m512i src, __mmask64 k, char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_set1_epi8",
        "full_name": "__m512i _mm512_maskz_set1_epi8(__mmask64 k, char a);",
        "description": "Broadcast 8-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_set1_epi32",
        "full_name": "__m128i _mm_mask_set1_epi32(__m128i src, __mmask8 k, int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_set1_epi32",
        "full_name": "__m128i _mm_maskz_set1_epi32(__mmask8 k, int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_set1_epi32",
        "full_name": "__m256i _mm256_mask_set1_epi32(__m256i src, __mmask8 k, int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_set1_epi32",
        "full_name": "__m256i _mm256_maskz_set1_epi32(__mmask8 k, int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_set1_epi32",
        "full_name": "__m512i _mm512_mask_set1_epi32(__m512i src, __mmask16 k, int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_set1_epi32",
        "full_name": "__m512i _mm512_maskz_set1_epi32(__mmask16 k, int a);",
        "description": "Broadcast 32-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_set1_epi64",
        "full_name": "__m128i _mm_mask_set1_epi64(__m128i src, __mmask8 k, __int64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_set1_epi64",
        "full_name": "__m128i _mm_maskz_set1_epi64(__mmask8 k, __int64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_set1_epi64",
        "full_name": "__m256i _mm256_mask_set1_epi64(__m256i src, __mmask8 k, __int64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_set1_epi64",
        "full_name": "__m256i _mm256_maskz_set1_epi64(__mmask8 k, __int64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_set1_epi64",
        "full_name": "__m512i _mm512_mask_set1_epi64(__m512i src, __mmask8 k, __int64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_set1_epi64",
        "full_name": "__m512i _mm512_maskz_set1_epi64(__mmask8 k, __int64 a);",
        "description": "Broadcast 64-bit integer \"a\" to all elements of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_set4_epi32",
        "full_name": "__m512i _mm512_set4_epi32(int d, int c, int b, int a);",
        "description": "Set packed 32-bit integers in \"dst\" with the repeated 4 element sequence."
    },
    {
        "name": "_mm512_set4_epi64",
        "full_name": "__m512i _mm512_set4_epi64(__int64 d, __int64 c, __int64 b, __int64 a);",
        "description": "Set packed 64-bit integers in \"dst\" with the repeated 4 element sequence."
    },
    {
        "name": "_mm512_set4_pd",
        "full_name": "__m512d _mm512_set4_pd(double d, double c, double b, double a);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the repeated 4 element sequence."
    },
    {
        "name": "_mm512_set4_ps",
        "full_name": "__m512 _mm512_set4_ps(float d, float c, float b, float a);",
        "description": "Set packed single-precision (32-bit) floating-point elements in \"dst\" with the repeated 4 element sequence."
    },
    {
        "name": "_mm256_setr_m128",
        "full_name": "__m256 _mm256_setr_m128(__m128 lo, __m128 hi);",
        "description": "Set packed __m256 vector \"dst\" with the supplied values."
    },
    {
        "name": "_mm256_setr_m128d",
        "full_name": "__m256d _mm256_setr_m128d(__m128d lo, __m128d hi);",
        "description": "Set packed __m256d vector \"dst\" with the supplied values."
    },
    {
        "name": "_mm512_setr4_epi32",
        "full_name": "__m512i _mm512_setr4_epi32(int d, int c, int b, int a);",
        "description": "Set packed 32-bit integers in \"dst\" with the repeated 4 element sequence in reverse order."
    },
    {
        "name": "_mm512_setr4_epi64",
        "full_name": "__m512i _mm512_setr4_epi64(__int64 d, __int64 c, __int64 b, __int64 a);",
        "description": "Set packed 64-bit integers in \"dst\" with the repeated 4 element sequence in reverse order."
    },
    {
        "name": "_mm512_setr4_pd",
        "full_name": "__m512d _mm512_setr4_pd(double d, double c, double b, double a);",
        "description": "Set packed double-precision (64-bit) floating-point elements in \"dst\" with the repeated 4 element sequence in reverse order."
    },
    {
        "name": "_mm512_setr4_ps",
        "full_name": "__m512 _mm512_setr4_ps(float d, float c, float b, float a);",
        "description": "Set packed single-precision (32-bit) floating-point elements in \"dst\" with the repeated 4 element sequence in reverse order."
    },
    {
        "name": "_mm256_extract_epi8",
        "full_name": "int _mm256_extract_epi8(__m256i a, const int index);",
        "description": "Extract an 8-bit integer from \"a\", selected with \"index\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_extract_epi16",
        "full_name": "int _mm256_extract_epi16(__m256i a, const int index);",
        "description": "Extract a 16-bit integer from \"a\", selected with \"index\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_extract_epi32",
        "full_name": "__int32 _mm256_extract_epi32(__m256i a, const int index);",
        "description": "Extract a 32-bit integer from \"a\", selected with \"index\", and store the result in \"dst\"."
    },
    {
        "name": "_mm256_extract_epi64",
        "full_name": "__int64 _mm256_extract_epi64(__m256i a, const int index);",
        "description": "Extract a 64-bit integer from \"a\", selected with \"index\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_storeu_epi32",
        "full_name": "void _mm_storeu_epi32(void *mem_addr, __m128i a);",
        "description": "Store 128-bits (composed of 4 packed 32-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_sll_epi64",
        "full_name": "__m512i _mm512_sll_epi64(__m512i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" left by \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_lddqu_si128",
        "full_name": "__m128i _mm_lddqu_si128(__m128i const * mem_addr);",
        "description": "Load 128-bits of integer data from unaligned memory into \"dst\". This intrinsic may perform better than \"_mm_loadu_si128\" when the data crosses a cache line boundary."
    },
    {
        "name": "_mm256_lddqu_si256",
        "full_name": "__m256i _mm256_lddqu_si256(__m256i const * mem_addr);",
        "description": "Load 256-bits of integer data from unaligned memory into \"dst\". This intrinsic may perform better than \"_mm256_loadu_si256\" when the data crosses a cache line boundary."
    },
    {
        "name": "_mm_mask_load_epi32",
        "full_name": "__m128i _mm_mask_load_epi32(__m128i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_maskz_load_epi32",
        "full_name": "__m128i _mm_maskz_load_epi32(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_mask_load_epi32",
        "full_name": "__m256i _mm256_mask_load_epi32(__m256i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_maskz_load_epi32",
        "full_name": "__m256i _mm256_maskz_load_epi32(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_load_epi32",
        "full_name": "__m512i _mm512_mask_load_epi32(__m512i src, __mmask16 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_maskz_load_epi32",
        "full_name": "__m512i _mm512_maskz_load_epi32(__mmask16 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_mask_load_epi64",
        "full_name": "__m128i _mm_mask_load_epi64(__m128i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_maskz_load_epi64",
        "full_name": "__m128i _mm_maskz_load_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_mask_load_epi64",
        "full_name": "__m256i _mm256_mask_load_epi64(__m256i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_maskz_load_epi64",
        "full_name": "__m256i _mm256_maskz_load_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_load_epi64",
        "full_name": "__m512i _mm512_mask_load_epi64(__m512i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_maskz_load_epi64",
        "full_name": "__m512i _mm512_maskz_load_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_load_mask8",
        "full_name": "__mmask8 _load_mask8(__mmask8 *mem_addr);",
        "description": "Load 8-bit mask from memory into \"k\"."
    },
    {
        "name": "_load_mask16",
        "full_name": "__mmask16 _load_mask16(__mmask16 *mem_addr);",
        "description": "Load 16-bit mask from memory into \"k\"."
    },
    {
        "name": "_load_mask32",
        "full_name": "__mmask32 _load_mask32(__mmask32 *mem_addr);",
        "description": "Load 32-bit mask from memory into \"k\"."
    },
    {
        "name": "_load_mask64",
        "full_name": "__mmask64 _load_mask64(__mmask64 *mem_addr);",
        "description": "Load 64-bit mask from memory into \"k\"."
    },
    {
        "name": "_mm_mask_load_pd",
        "full_name": "__m128d _mm_mask_load_pd(__m128d src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_maskz_load_pd",
        "full_name": "__m128d _mm_maskz_load_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_mask_load_pd",
        "full_name": "__m256d _mm256_mask_load_pd(__m256d src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_maskz_load_pd",
        "full_name": "__m256d _mm256_maskz_load_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_load_pd",
        "full_name": "__m512d _mm512_mask_load_pd(__m512d src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_maskz_load_pd",
        "full_name": "__m512d _mm512_maskz_load_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_mask_load_ps",
        "full_name": "__m128 _mm_mask_load_ps(__m128 src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_maskz_load_ps",
        "full_name": "__m128 _mm_maskz_load_ps(__mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_mask_load_ps",
        "full_name": "__m256 _mm256_mask_load_ps(__m256 src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_maskz_load_ps",
        "full_name": "__m256 _mm256_maskz_load_ps(__mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_load_ps",
        "full_name": "__m512 _mm512_mask_load_ps(__m512 src, __mmask16 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_maskz_load_ps",
        "full_name": "__m512 _mm512_maskz_load_ps(__mmask16 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_loaddup_pd",
        "full_name": "__m128d _mm_loaddup_pd(double const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into both elements of \"dst\"."
    },
    {
        "name": "_mm_loadu_epi16",
        "full_name": "__m128i _mm_loadu_epi16(void const * mem_addr);",
        "description": "Load 128-bits (composed of 8 packed 16-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_loadu_epi16",
        "full_name": "__m128i _mm_mask_loadu_epi16(__m128i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 16-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskz_loadu_epi16",
        "full_name": "__m128i _mm_maskz_loadu_epi16(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 16-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_loadu_epi16",
        "full_name": "__m256i _mm256_loadu_epi16(void const * mem_addr);",
        "description": "Load 256-bits (composed of 16 packed 16-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_loadu_epi16",
        "full_name": "__m256i _mm256_mask_loadu_epi16(__m256i src, __mmask16 k, void const * mem_addr);",
        "description": "Load packed 16-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_maskz_loadu_epi16",
        "full_name": "__m256i _mm256_maskz_loadu_epi16(__mmask16 k, void const * mem_addr);",
        "description": "Load packed 16-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_loadu_epi16",
        "full_name": "__m512i _mm512_loadu_epi16(void const * mem_addr);",
        "description": "Load 512-bits (composed of 32 packed 16-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_loadu_epi16",
        "full_name": "__m512i _mm512_mask_loadu_epi16(__m512i src, __mmask32 k, void const * mem_addr);",
        "description": "Load packed 16-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_maskz_loadu_epi16",
        "full_name": "__m512i _mm512_maskz_loadu_epi16(__mmask32 k, void const * mem_addr);",
        "description": "Load packed 16-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_loadu_epi32",
        "full_name": "__m128i _mm_mask_loadu_epi32(__m128i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskz_loadu_epi32",
        "full_name": "__m128i _mm_maskz_loadu_epi32(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_loadu_epi32",
        "full_name": "__m256i _mm256_loadu_epi32(void const * mem_addr);",
        "description": "Load 256-bits (composed of 8 packed 32-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_loadu_epi32",
        "full_name": "__m256i _mm256_mask_loadu_epi32(__m256i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_maskz_loadu_epi32",
        "full_name": "__m256i _mm256_maskz_loadu_epi32(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_loadu_epi32",
        "full_name": "__m512i _mm512_loadu_epi32(void const * mem_addr);",
        "description": "Load 512-bits (composed of 16 packed 32-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_loadu_epi32",
        "full_name": "__m512i _mm512_mask_loadu_epi32(__m512i src, __mmask16 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_loadu_epi64",
        "full_name": "__m128i _mm_loadu_epi64(void const * mem_addr);",
        "description": "Load 128-bits (composed of 2 packed 64-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_loadu_epi64",
        "full_name": "__m128i _mm_mask_loadu_epi64(__m128i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskz_loadu_epi64",
        "full_name": "__m128i _mm_maskz_loadu_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_loadu_epi64",
        "full_name": "__m256i _mm256_loadu_epi64(void const * mem_addr);",
        "description": "Load 256-bits (composed of 4 packed 64-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_loadu_epi64",
        "full_name": "__m256i _mm256_mask_loadu_epi64(__m256i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_maskz_loadu_epi64",
        "full_name": "__m256i _mm256_maskz_loadu_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_loadu_epi64",
        "full_name": "__m512i _mm512_mask_loadu_epi64(__m512i src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_maskz_loadu_epi64",
        "full_name": "__m512i _mm512_maskz_loadu_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_loadu_epi8",
        "full_name": "__m128i _mm_loadu_epi8(void const * mem_addr);",
        "description": "Load 128-bits (composed of 16 packed 8-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_loadu_epi8",
        "full_name": "__m128i _mm_mask_loadu_epi8(__m128i src, __mmask16 k, void const * mem_addr);",
        "description": "Load packed 8-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskz_loadu_epi8",
        "full_name": "__m128i _mm_maskz_loadu_epi8(__mmask16 k, void const * mem_addr);",
        "description": "Load packed 8-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_loadu_epi8",
        "full_name": "__m256i _mm256_loadu_epi8(void const * mem_addr);",
        "description": "Load 256-bits (composed of 32 packed 8-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_loadu_epi8",
        "full_name": "__m256i _mm256_mask_loadu_epi8(__m256i src, __mmask32 k, void const * mem_addr);",
        "description": "Load packed 8-bit integers from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_maskz_loadu_epi8",
        "full_name": "__m256i _mm256_maskz_loadu_epi8(__mmask32 k, void const * mem_addr);",
        "description": "Load packed 8-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_loadu_epi8",
        "full_name": "__m512i _mm512_loadu_epi8(void const * mem_addr);",
        "description": "Load 512-bits (composed of 64 packed 8-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_loadu_pd",
        "full_name": "__m128d _mm_mask_loadu_pd(__m128d src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memoy into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskz_loadu_pd",
        "full_name": "__m128d _mm_maskz_loadu_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memoy into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_loadu_pd",
        "full_name": "__m256d _mm256_loadu_pd(double const * mem_addr);",
        "description": "Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_loadu_pd",
        "full_name": "__m256d _mm256_mask_loadu_pd(__m256d src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memoy into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_maskz_loadu_pd",
        "full_name": "__m256d _mm256_maskz_loadu_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memoy into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_loadu_pd",
        "full_name": "__m512d _mm512_loadu_pd(double const * mem_addr);",
        "description": "Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into \"dst\". \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_loadu_pd",
        "full_name": "__m512d _mm512_mask_loadu_pd(__m512d src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memoy into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_maskz_loadu_pd",
        "full_name": "__m512d _mm512_maskz_loadu_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memoy into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_loadu_ps",
        "full_name": "__m128 _mm_mask_loadu_ps(__m128 src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskz_loadu_ps",
        "full_name": "__m128 _mm_maskz_loadu_ps(__mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_loadu_ps",
        "full_name": "__m256 _mm256_loadu_ps(float const * mem_addr);",
        "description": "Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into \"dst\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_loadu_ps",
        "full_name": "__m256 _mm256_mask_loadu_ps(__m256 src, __mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_maskz_loadu_ps",
        "full_name": "__m256 _mm256_maskz_loadu_ps(__mmask8 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_loadu_ps",
        "full_name": "__m512 _mm512_loadu_ps(void const * mem_addr);",
        "description": "Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into \"dst\". \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_loadu_ps",
        "full_name": "__m512 _mm512_mask_loadu_ps(__m512 src, __mmask16 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_maskz_loadu_ps",
        "full_name": "__m512 _mm512_maskz_loadu_ps(__mmask16 k, void const * mem_addr);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_srlv_epi32",
        "full_name": "__m256i _mm256_mask_srlv_epi32(__m256i src, __mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srlv_epi32",
        "full_name": "__m256i _mm256_maskz_srlv_epi32(__mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_srlv_epi32",
        "full_name": "__m512i _mm512_mask_srlv_epi32(__m512i src, __mmask16 k, __m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srlv_epi32",
        "full_name": "__m512i _mm512_maskz_srlv_epi32(__mmask16 k, __m512i a, __m512i count);",
        "description": "Shift packed 32-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_srlv_epi64",
        "full_name": "__m128i _mm_srlv_epi64(__m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_srlv_epi64",
        "full_name": "__m128i _mm_mask_srlv_epi64(__m128i src, __mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_srlv_epi64",
        "full_name": "__m128i _mm_maskz_srlv_epi64(__mmask8 k, __m128i a, __m128i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_srlv_epi64",
        "full_name": "__m256i _mm256_srlv_epi64(__m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_srlv_epi64",
        "full_name": "__m256i _mm256_mask_srlv_epi64(__m256i src, __mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_srlv_epi64",
        "full_name": "__m256i _mm256_maskz_srlv_epi64(__mmask8 k, __m256i a, __m256i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_srlv_epi64",
        "full_name": "__m512i _mm512_srlv_epi64(__m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_srlv_epi64",
        "full_name": "__m512i _mm512_mask_srlv_epi64(__m512i src, __mmask8 k, __m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_srlv_epi64",
        "full_name": "__m512i _mm512_maskz_srlv_epi64(__mmask8 k, __m512i a, __m512i count);",
        "description": "Shift packed 64-bit integers in \"a\" right by the amount specified by the corresponding element in \"count\" while shifting in zeros, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_addcarry_u32",
        "full_name": "uint8_t _addcarry_u32(uint8_t c_in, uint32_t a, uint32_t b, uint32_t *out);",
        "description": "Add unsigned 32-bit integers \"a\" and \"b\" with unsigned 8-bit carry-in \"c_in\" (carry flag), and store the unsigned 32-bit result in \"out\", and the carry-out in \"dst\" (carry or overflow flag)."
    },
    {
        "name": "_addcarry_u64",
        "full_name": "uint8_t _addcarry_u64(uint8_t c_in, uint64_t a, uint64_t b, uint64_t *out);",
        "description": "Add unsigned 64-bit integers \"a\" and \"b\" with unsigned 8-bit carry-in \"c_in\" (carry flag), and store the unsigned 64-bit result in \"out\", and the carry-out in \"dst\" (carry or overflow flag)."
    },
    {
        "name": "_addcarryx_u32",
        "full_name": "uint8_t _addcarryx_u32(uint8_t c_in, uint32_t a, uint32_t b, uint32_t *out);",
        "description": "Add unsigned 32-bit integers \"a\" and \"b\" with unsigned 8-bit carry-in \"c_in\" (carry or overflow flag), and store the unsigned 32-bit result in \"out\", and the carry-out in \"dst\" (carry or overflow flag)."
    },
    {
        "name": "_addcarryx_u64",
        "full_name": "uint8_t _addcarryx_u64(uint8_t c_in, uint64_t a, uint64_t b, uint64_t *out);",
        "description": "Add unsigned 64-bit integers \"a\" and \"b\" with unsigned 8-bit carry-in \"c_in\" (carry or overflow flag), and store the unsigned 64-bit result in \"out\", and the carry-out in \"dst\" (carry or overflow flag)."
    },
    {
        "name": "_mm256_broadcast_pd",
        "full_name": "__m256d _mm256_broadcast_pd(__m128d const * mem_addr);",
        "description": "Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of \"dst\"."
    },
    {
        "name": "_mm256_broadcast_ps",
        "full_name": "__m256 _mm256_broadcast_ps(__m128 const * mem_addr);",
        "description": "Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of \"dst\"."
    },
    {
        "name": "_mm256_broadcast_sd",
        "full_name": "__m256d _mm256_broadcast_sd(double const * mem_addr);",
        "description": "Broadcast a double-precision (64-bit) floating-point element from memory to all elements of \"dst\"."
    },
    {
        "name": "_mm256_broadcast_ss",
        "full_name": "__m256 _mm256_broadcast_ss(float const * mem_addr);",
        "description": "Broadcast a single-precision (32-bit) floating-point element from memory to all elements of \"dst\"."
    },
    {
        "name": "_mm_cmp_epi16_mask",
        "full_name": "__mmask8 _mm_cmp_epi16_mask(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_epi16_mask",
        "full_name": "__mmask8 _mm_mask_cmp_epi16_mask(__mmask8 k1, __m128i a, __m128i b, const int imm8);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmp_epi16_mask",
        "full_name": "__mmask16 _mm256_cmp_epi16_mask(__m256i a, __m256i b, const int imm8);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_epi16_mask",
        "full_name": "__mmask16 _mm256_mask_cmp_epi16_mask(__mmask16 k1, __m256i a, __m256i b, const int imm8);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmp_epi16_mask",
        "full_name": "__mmask32 _mm512_cmp_epi16_mask(__m512i a, __m512i b, const int imm8);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_epi32_mask",
        "full_name": "__mmask8 _mm_mask_cmp_epi32_mask(__mmask8 k1, __m128i a, __m128i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmp_epi32_mask",
        "full_name": "__mmask8 _mm256_mask_cmp_epi32_mask(__mmask8 k1, __m256i a, __m256i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmp_epi32_mask",
        "full_name": "__mmask16 _mm512_mask_cmp_epi32_mask(__mmask16 k1, __m512i a, __m512i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_epi64_mask",
        "full_name": "__mmask8 _mm_cmp_epi64_mask(__m128i a, __m128i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_epi64_mask",
        "full_name": "__mmask8 _mm_mask_cmp_epi64_mask(__mmask8 k1, __m128i a, __m128i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_loadu_epi64",
        "full_name": "__m512i _mm512_loadu_epi64(void const * mem_addr);",
        "description": "Load 512-bits (composed of 8 packed 64-bit integers) from memory into \"dst\".\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_cmp_epi64_mask",
        "full_name": "__mmask8 _mm256_cmp_epi64_mask(__m256i a, __m256i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_epi64_mask",
        "full_name": "__mmask8 _mm256_mask_cmp_epi64_mask(__mmask8 k1, __m256i a, __m256i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmp_epi64_mask",
        "full_name": "__mmask8 _mm512_cmp_epi64_mask(__m512i a, __m512i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmp_epi64_mask",
        "full_name": "__mmask8 _mm512_mask_cmp_epi64_mask(__mmask8 k1, __m512i a, __m512i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmp_epu32_mask",
        "full_name": "__mmask8 _mm256_cmp_epu32_mask(__m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmp_epu64_mask",
        "full_name": "__mmask8 _mm_cmp_epu64_mask(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_cmp_epu16_mask",
        "full_name": "__mmask32 _mm512_cmp_epu16_mask(__m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_cmp_epi8_mask",
        "full_name": "__mmask16 _mm_cmp_epi8_mask(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_epi8_mask",
        "full_name": "__mmask16 _mm_mask_cmp_epi8_mask(__mmask16 k1, __m128i a, __m128i b, const int imm8);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_epu16_mask",
        "full_name": "__mmask8 _mm_cmp_epu16_mask(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmp_epu16_mask",
        "full_name": "__mmask16 _mm256_cmp_epu16_mask(__m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_epu16_mask",
        "full_name": "__mmask16 _mm256_mask_cmp_epu16_mask(__mmask16 k1, __m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmp_epu16_mask",
        "full_name": "__mmask32 _mm512_mask_cmp_epu16_mask(__mmask32 k1, __m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_epu32_mask",
        "full_name": "__mmask8 _mm_cmp_epu32_mask(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_epu32_mask",
        "full_name": "__mmask8 _mm_mask_cmp_epu32_mask(__mmask8 k1, __m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_cmp_epu32_mask",
        "full_name": "__mmask8 _mm256_mask_cmp_epu32_mask(__mmask8 k1, __m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmp_epu32_mask",
        "full_name": "__mmask16 _mm512_cmp_epu32_mask(__m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_epu64_mask",
        "full_name": "__mmask8 _mm_mask_cmp_epu64_mask(__mmask8 k1, __m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmp_epu64_mask",
        "full_name": "__mmask8 _mm256_cmp_epu64_mask(__m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_epu64_mask",
        "full_name": "__mmask8 _mm256_mask_cmp_epu64_mask(__mmask8 k1, __m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmp_epu64_mask",
        "full_name": "__mmask8 _mm512_cmp_epu64_mask(__m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmp_epu64_mask",
        "full_name": "__mmask8 _mm512_mask_cmp_epu64_mask(__mmask8 k1, __m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 64-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_epu8_mask",
        "full_name": "__mmask16 _mm_cmp_epu8_mask(__m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmp_epu8_mask",
        "full_name": "__mmask32 _mm256_cmp_epu8_mask(__m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_epu8_mask",
        "full_name": "__mmask32 _mm256_mask_cmp_epu8_mask(__mmask32 k1, __m256i a, __m256i b, const int imm8);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cmp_epu8_mask",
        "full_name": "__mmask64 _mm512_cmp_epu8_mask(__m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm512_mask_cmp_epu8_mask",
        "full_name": "__mmask64 _mm512_mask_cmp_epu8_mask(__mmask64 k1, __m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_pd",
        "full_name": "__m128d _mm_cmp_pd(__m128d a, __m128d b, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmp_pd_mask",
        "full_name": "__mmask8 _mm_cmp_pd_mask(__m128d x, __m128d y, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_pd_mask",
        "full_name": "__mmask8 _mm_mask_cmp_pd_mask(__mmask8 k1, __m128d x, __m128d y, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmp_pd_mask",
        "full_name": "__mmask8 _mm256_cmp_pd_mask(__m256d x, __m256d y, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_pd_mask",
        "full_name": "__mmask8 _mm256_mask_cmp_pd_mask(__mmask8 k1, __m256d x, __m256d y, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmp_pd_mask",
        "full_name": "__mmask8 _mm512_mask_cmp_pd_mask(__mmask8 k1, __m512d x, __m512d y, const int imm8);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_ps",
        "full_name": "__m128 _mm_cmp_ps(__m128 a, __m128 b, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_cmp_ps_mask",
        "full_name": "__mmask8 _mm_cmp_ps_mask(__m128 x, __m128 y, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_ps_mask",
        "full_name": "__mmask8 _mm_mask_cmp_ps_mask(__mmask8 k1, __m128 x, __m128 y, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_cmp_ps_mask",
        "full_name": "__mmask8 _mm256_cmp_ps_mask(__m256 x, __m256 y, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_ps_mask",
        "full_name": "__mmask8 _mm256_mask_cmp_ps_mask(__mmask8 k1, __m256 x, __m256 y, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmp_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmp_ps_mask(__mmask16 k1, __m512 x, __m512 y, const int imm8);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_sd",
        "full_name": "__m128d _mm_cmp_sd(__m128d a, __m128d b, const int imm8);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cmp_sd_mask",
        "full_name": "__mmask8 _mm_cmp_sd_mask(__m128d a, __m128d b, const int imm8);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the result in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_sd_mask",
        "full_name": "__mmask8 _mm_mask_cmp_sd_mask(__mmask8 k1, __m128d x, __m128d y, const int imm8);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the result in mask vector \"k\" using zeromask \"k1\" (the element is zeroed out when mask bit 0 is not set)."
    },
    {
        "name": "_mm_cmp_ss",
        "full_name": "__m128 _mm_cmp_ss(__m128 a, __m128 b, const int imm8);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_cmp_ss_mask",
        "full_name": "__mmask8 _mm_cmp_ss_mask(__m128 a, __m128 b, const int imm8);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the result in mask vector \"k\"."
    },
    {
        "name": "_mm_mask_cmp_ss_mask",
        "full_name": "__mmask8 _mm_mask_cmp_ss_mask(__mmask8 k1, __m128 x, __m128 y, const int imm8);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the result in mask vector \"k\" using zeromask \"k1\" (the element is zeroed out when mask bit 0 is not set)."
    },
    {
        "name": "_mm_mask_compressstoreu_epi32",
        "full_name": "void _mm_mask_compressstoreu_epi32(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_compressstoreu_epi32",
        "full_name": "void _mm256_mask_compressstoreu_epi32(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_compressstoreu_epi32",
        "full_name": "void _mm512_mask_compressstoreu_epi32(void *base_addr, __mmask16 k, __m512i a);",
        "description": "Contiguously store the active 32-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_compressstoreu_epi64",
        "full_name": "void _mm_mask_compressstoreu_epi64(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_compressstoreu_epi64",
        "full_name": "void _mm256_mask_compressstoreu_epi64(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_compressstoreu_epi64",
        "full_name": "void _mm512_mask_compressstoreu_epi64(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Contiguously store the active 64-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_compressstoreu_pd",
        "full_name": "void _mm256_mask_compressstoreu_pd(void *base_addr, __mmask8 k, __m256d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_compressstoreu_pd",
        "full_name": "void _mm512_mask_compressstoreu_pd(void *base_addr, __mmask8 k, __m512d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_compressstoreu_ps",
        "full_name": "void _mm256_mask_compressstoreu_ps(void *base_addr, __mmask8 k, __m256 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_compressstoreu_ps",
        "full_name": "void _mm512_mask_compressstoreu_ps(void *base_addr, __mmask16 k, __m512 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_loadu2_m128i",
        "full_name": "__m256i _mm256_loadu2_m128i(__m128i const * hiaddr, __m128i const * loaddr);",
        "description": "Load two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value in \"dst\".\n\t\"hiaddr\" and \"loaddr\" do not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_maskload_epi32",
        "full_name": "__m128i _mm_maskload_epi32(int const * mem_addr, __m128i mask);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using \"mask\" (elements are zeroed out when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm_maskload_epi64",
        "full_name": "__m128i _mm_maskload_epi64(__int64 const * mem_addr, __m128i mask);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using \"mask\" (elements are zeroed out when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm256_maskload_epi64",
        "full_name": "__m256i _mm256_maskload_epi64(__int64 const * mem_addr, __m256i mask);",
        "description": "Load packed 64-bit integers from memory into \"dst\" using \"mask\" (elements are zeroed out when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm_maskload_pd",
        "full_name": "__m128d _mm_maskload_pd(double const * mem_addr, __m128i mask);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using \"mask\" (elements are zeroed out when the high bit of the corresponding element is not set)."
    },
    {
        "name": "_mm256_maskload_pd",
        "full_name": "__m256d _mm256_maskload_pd(double const * mem_addr, __m256i mask);",
        "description": "Load packed double-precision (64-bit) floating-point elements from memory into \"dst\" using \"mask\" (elements are zeroed out when the high bit of the corresponding element is not set)."
    },
    {
        "name": "_mm_maskload_ps",
        "full_name": "__m128 _mm_maskload_ps(float const * mem_addr, __m128i mask);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using \"mask\" (elements are zeroed out when the high bit of the corresponding element is not set)."
    },
    {
        "name": "_mm256_maskload_ps",
        "full_name": "__m256 _mm256_maskload_ps(float const * mem_addr, __m256i mask);",
        "description": "Load packed single-precision (32-bit) floating-point elements from memory into \"dst\" using \"mask\" (elements are zeroed out when the high bit of the corresponding element is not set)."
    },
    {
        "name": "_mm_maskstore_epi32",
        "full_name": "void _mm_maskstore_epi32(int *mem_addr, __m128i mask, __m128i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using \"mask\" (elements are not stored when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm256_maskstore_epi32",
        "full_name": "void _mm256_maskstore_epi32(int *mem_addr, __m256i mask, __m256i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using \"mask\" (elements are not stored when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm256_maskstore_epi64",
        "full_name": "void _mm256_maskstore_epi64(__int64 *mem_addr, __m256i mask, __m256i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using \"mask\" (elements are not stored when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm_maskstore_epi64",
        "full_name": "void _mm_maskstore_epi64(__int64 *mem_addr, __m128i mask, __m128i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using \"mask\" (elements are not stored when the highest bit is not set in the corresponding element)."
    },
    {
        "name": "_mm_maskstore_pd",
        "full_name": "void _mm_maskstore_pd(double *mem_addr, __m128i mask, __m128d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using \"mask\"."
    },
    {
        "name": "_mm256_maskstore_pd",
        "full_name": "void _mm256_maskstore_pd(double *mem_addr, __m256i mask, __m256d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using \"mask\"."
    },
    {
        "name": "_mm_maskstore_ps",
        "full_name": "void _mm_maskstore_ps(float *mem_addr, __m128i mask, __m128 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using \"mask\"."
    },
    {
        "name": "_mm256_maskstore_ps",
        "full_name": "void _mm256_maskstore_ps(float *mem_addr, __m256i mask, __m256 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using \"mask\"."
    },
    {
        "name": "_mm256_mask_store_epi32",
        "full_name": "void _mm256_mask_store_epi32(void *mem_addr, __mmask8 k, __m256i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_store_epi32",
        "full_name": "void _mm512_mask_store_epi32(void *mem_addr, __mmask16 k, __m512i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_store_epi64",
        "full_name": "void _mm512_mask_store_epi64(void *mem_addr, __mmask8 k, __m512i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_store_mask8",
        "full_name": "void _store_mask8(__mmask8 *mem_addr, __mmask8 a);",
        "description": "Store 8-bit mask from \"a\" into memory."
    },
    {
        "name": "_store_mask16",
        "full_name": "void _store_mask16(__mmask16 *mem_addr, __mmask16 a);",
        "description": "Store 16-bit mask from \"a\" into memory."
    },
    {
        "name": "_store_mask32",
        "full_name": "void _store_mask32(__mmask32 *mem_addr, __mmask32 a);",
        "description": "Store 32-bit mask from \"a\" into memory."
    },
    {
        "name": "_store_mask64",
        "full_name": "void _store_mask64(__mmask64 *mem_addr, __mmask64 a);",
        "description": "Store 64-bit mask from \"a\" into memory."
    },
    {
        "name": "_mm_mask_store_pd",
        "full_name": "void _mm_mask_store_pd(double *mem_addr, __mmask8 k, __m128d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_mask_store_pd",
        "full_name": "void _mm256_mask_store_pd(double *mem_addr, __mmask8 k, __m256d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_store_pd",
        "full_name": "void _mm512_mask_store_pd(double *mem_addr, __mmask8 k, __m512d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_mask_store_ps",
        "full_name": "void _mm_mask_store_ps(float *mem_addr, __mmask8 k, __m128 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_cmp_epi16_mask",
        "full_name": "__mmask32 _mm512_mask_cmp_epi16_mask(__mmask32 k1, __m512i a, __m512i b, const int imm8);",
        "description": "Compare packed signed 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cmp_epi32_mask",
        "full_name": "__mmask8 _mm_cmp_epi32_mask(__m128i a, __m128i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmp_epi32_mask",
        "full_name": "__mmask8 _mm256_cmp_epi32_mask(__m256i a, __m256i b, _MM_CMPINT_ENUM imm8);",
        "description": "Compare packed signed 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_cmp_epi8_mask",
        "full_name": "__mmask32 _mm256_cmp_epi8_mask(__m256i a, __m256i b, const int imm8);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\"."
    },
    {
        "name": "_mm256_mask_cmp_epi8_mask",
        "full_name": "__mmask32 _mm256_mask_cmp_epi8_mask(__mmask32 k1, __m256i a, __m256i b, const int imm8);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmp_epi8_mask",
        "full_name": "__mmask64 _mm512_mask_cmp_epi8_mask(__mmask64 k1, __m512i a, __m512i b, const int imm8);",
        "description": "Compare packed signed 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cmp_epu16_mask",
        "full_name": "__mmask8 _mm_mask_cmp_epu16_mask(__mmask8 k1, __m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 16-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_cmp_epu32_mask",
        "full_name": "__mmask16 _mm512_mask_cmp_epu32_mask(__mmask16 k1, __m512i a, __m512i b, const int imm8);",
        "description": "Compare packed unsigned 32-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_cmp_epu8_mask",
        "full_name": "__mmask16 _mm_mask_cmp_epu8_mask(__mmask16 k1, __m128i a, __m128i b, const int imm8);",
        "description": "Compare packed unsigned 8-bit integers in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_loadu_epi32",
        "full_name": "__m512i _mm512_maskz_loadu_epi32(__mmask16 k, void const * mem_addr);",
        "description": "Load packed 32-bit integers from memory into \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_compressstoreu_pd",
        "full_name": "void _mm_mask_compressstoreu_pd(void *base_addr, __mmask8 k, __m128d a);",
        "description": "Contiguously store the active double-precision (64-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_compressstoreu_ps",
        "full_name": "void _mm_mask_compressstoreu_ps(void *base_addr, __mmask8 k, __m128 a);",
        "description": "Contiguously store the active single-precision (32-bit) floating-point elements in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_expandloadu_epi64",
        "full_name": "__m128i _mm_mask_expandloadu_epi64(__m128i src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 64-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expandloadu_epi64",
        "full_name": "__m128i _mm_maskz_expandloadu_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 64-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expandloadu_epi64",
        "full_name": "__m256i _mm256_mask_expandloadu_epi64(__m256i src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 64-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expandloadu_epi64",
        "full_name": "__m256i _mm256_maskz_expandloadu_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 64-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expandloadu_epi64",
        "full_name": "__m512i _mm512_mask_expandloadu_epi64(__m512i src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 64-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expandloadu_epi64",
        "full_name": "__m512i _mm512_maskz_expandloadu_epi64(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 64-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expandloadu_pd",
        "full_name": "__m128d _mm_mask_expandloadu_pd(__m128d src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expandloadu_pd",
        "full_name": "__m128d _mm_maskz_expandloadu_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expandloadu_pd",
        "full_name": "__m256d _mm256_mask_expandloadu_pd(__m256d src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expandloadu_pd",
        "full_name": "__m256d _mm256_maskz_expandloadu_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expandloadu_pd",
        "full_name": "__m512d _mm512_mask_expandloadu_pd(__m512d src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expandloadu_pd",
        "full_name": "__m512d _mm512_maskz_expandloadu_pd(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expandloadu_ps",
        "full_name": "__m128 _mm_mask_expandloadu_ps(__m128 src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expandloadu_ps",
        "full_name": "__m128 _mm_maskz_expandloadu_ps(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expandloadu_ps",
        "full_name": "__m256 _mm256_mask_expandloadu_ps(__m256 src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expandloadu_ps",
        "full_name": "__m256 _mm256_maskz_expandloadu_ps(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expandloadu_ps",
        "full_name": "__m512 _mm512_mask_expandloadu_ps(__m512 src, __mmask16 k, void const * mem_addr);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expandloadu_ps",
        "full_name": "__m512 _mm512_maskz_expandloadu_ps(__mmask16 k, void const * mem_addr);",
        "description": "Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expandloadu_epi32",
        "full_name": "__m128i _mm_mask_expandloadu_epi32(__m128i src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 32-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expandloadu_epi32",
        "full_name": "__m128i _mm_maskz_expandloadu_epi32(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 32-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expandloadu_epi32",
        "full_name": "__m256i _mm256_mask_expandloadu_epi32(__m256i src, __mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 32-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expandloadu_epi32",
        "full_name": "__m256i _mm256_maskz_expandloadu_epi32(__mmask8 k, void const * mem_addr);",
        "description": "Load contiguous active 32-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expandloadu_epi32",
        "full_name": "__m512i _mm512_mask_expandloadu_epi32(__m512i src, __mmask16 k, void const * mem_addr);",
        "description": "Load contiguous active 32-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expandloadu_epi32",
        "full_name": "__m512i _mm512_maskz_expandloadu_epi32(__mmask16 k, void const * mem_addr);",
        "description": "Load contiguous active 32-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_load_sd",
        "full_name": "__m128d _mm_mask_load_sd(__m128d src, __mmask8 k, void const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and set the upper element of \"dst\" to zero. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_maskz_load_sd",
        "full_name": "__m128d _mm_maskz_load_sd(__mmask8 k, void const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and set the upper element of \"dst\" to zero. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_mask_load_ss",
        "full_name": "__m128 _mm_mask_load_ss(__m128 src, __mmask8 k, void const * mem_addr);",
        "description": "Load a single-precision (32-bit) floating-point element from memory into the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and set the upper elements of \"dst\" to zero. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_maskz_load_ss",
        "full_name": "__m128 _mm_maskz_load_ss(__mmask8 k, void const * mem_addr);",
        "description": "Load a single-precision (32-bit) floating-point element from memory into the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and set the upper elements of \"dst\" to zero. \"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_comi_round_sd",
        "full_name": "int _mm_comi_round_sd(__m128d a, __m128d b, const int imm8, const int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and return the boolean result (0 or 1). [sae_note]"
    },
    {
        "name": "_mm_comi_round_ss",
        "full_name": "int _mm_comi_round_ss(__m128 a, __m128 b, const int imm8, const int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and return the boolean result (0 or 1). [sae_note]"
    },
    {
        "name": "_mm_loadh_pd",
        "full_name": "__m128d _mm_loadh_pd(__m128d a, double const * mem_addr);",
        "description": "Load a double-precision (64-bit) floating-point element from memory into the upper element of \"dst\", and copy the lower element from \"a\" to \"dst\". \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_loadbe_i16",
        "full_name": "short _loadbe_i16(void const * ptr);",
        "description": "Load 16 bits from memory, perform a byte swap operation, and store the result in \"dst\"."
    },
    {
        "name": "_loadbe_i32",
        "full_name": "int _loadbe_i32(void const * ptr);",
        "description": "Load 32 bits from memory, perform a byte swap operation, and store the result in \"dst\"."
    },
    {
        "name": "_loadbe_i64",
        "full_name": "__int64 _loadbe_i64(void const * ptr);",
        "description": "Load 64 bits from memory, perform a byte swap operation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_loadu_si16",
        "full_name": "__m128i _mm_loadu_si16(void const * mem_addr);",
        "description": "Load unaligned 16-bit integer from memory into the first element of \"dst\"."
    },
    {
        "name": "_mm_loadu_si32",
        "full_name": "__m128i _mm_loadu_si32(void const * mem_addr);",
        "description": "Load unaligned 32-bit integer from memory into the first element of \"dst\"."
    },
    {
        "name": "_mm_loadu_si64",
        "full_name": "__m128i _mm_loadu_si64(void const * mem_addr);",
        "description": "Load unaligned 64-bit integer from memory into the first element of \"dst\"."
    },
    {
        "name": "_mm512_cmp_round_ps_mask",
        "full_name": "__mmask16 _mm512_cmp_round_ps_mask(__m512 a, __m512 b, const int imm8, const int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\". [sae_note]"
    },
    {
        "name": "_mm512_mask_cmp_round_ps_mask",
        "full_name": "__mmask16 _mm512_mask_cmp_round_ps_mask(__mmask16 k1, __m512 a, __m512 b, const int imm8, const int sae);",
        "description": "Compare packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set).  [sae_note]"
    },
    {
        "name": "_mm_cmp_round_ss_mask",
        "full_name": "__mmask8 _mm_cmp_round_ss_mask(__m128 a, __m128 b, const int imm8, const int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the result in mask vector \"k\". [sae_note]"
    },
    {
        "name": "_mm_mask_cmp_round_ss_mask",
        "full_name": "__mmask8 _mm_mask_cmp_round_ss_mask(__mmask8 k1, __m128 a, __m128 b, const int imm8, const int sae);",
        "description": "Compare the lower single-precision (32-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the result in mask vector \"k\" using zeromask \"k1\" (the element is zeroed out when mask bit 0 is not set). [sae_note]"
    },
    {
        "name": "_mm_mask_store_epi32",
        "full_name": "void _mm_mask_store_epi32(void *mem_addr, __mmask8 k, __m128i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_mask_store_epi64",
        "full_name": "void _mm_mask_store_epi64(void *mem_addr, __mmask8 k, __m128i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_mask_store_epi64",
        "full_name": "void _mm256_mask_store_epi64(void *mem_addr, __mmask8 k, __m256i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_i32loextgather_pd",
        "full_name": "__m512d _mm512_i32loextgather_pd(__m512i vindex, void const * base_addr, _MM_UPCONV_PD_ENUM conv, int scale, int hint);",
        "description": "Up-converts 8 double-precision (64-bit) floating-point elements in memory locations starting at location \"base_addr\" at packed 32-bit integer indices stored in the lower half of \"vindex\" scaled by \"scale\" using \"conv\" to 64-bit floating-point elements and stores them in \"dst\"."
    },
    {
        "name": "_mm512_i32logather_epi64",
        "full_name": "__m512i _mm512_i32logather_epi64(__m512i vindex, void const * base_addr, int scale);",
        "description": "Loads 8 64-bit integer elements from memory starting at location \"base_addr\" at packed 32-bit integer indices stored in the lower half of \"vindex\" scaled by \"scale\" and stores them in \"dst\"."
    },
    {
        "name": "_mm512_mask_i32logather_epi64",
        "full_name": "__m512i _mm512_mask_i32logather_epi64(__m512i src, __mmask8 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Loads 8 64-bit integer elements from memory starting at location \"base_addr\" at packed 32-bit integer indices stored in the lower half of \"vindex\" scaled by \"scale\" and stores them in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_i32logather_pd",
        "full_name": "__m512d _mm512_i32logather_pd(__m512i vindex, void const * base_addr, int scale);",
        "description": "Loads 8 double-precision (64-bit) floating-point elements stored at memory locations starting at location \"base_addr\" at packed 32-bit integer indices stored in the lower half of \"vindex\" scaled by \"scale\" them in \"dst\"."
    },
    {
        "name": "_mm512_mask_i32logather_pd",
        "full_name": "__m512d _mm512_mask_i32logather_pd(__m512d src, __mmask8 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Loads 8 double-precision (64-bit) floating-point elements from memory starting at location \"base_addr\" at packed 32-bit integer indices stored in the lower half of \"vindex\" scaled by \"scale\" into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_i32loscatter_epi64",
        "full_name": "void _mm512_i32loscatter_epi64(void *base_addr, __m512i vindex, __m512i a, int scale);",
        "description": "Stores 8 packed 64-bit integer elements located in \"a\" and stores them in memory locations starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\"."
    },
    {
        "name": "_mm512_mask_i32loscatter_epi64",
        "full_name": "void _mm512_mask_i32loscatter_epi64(void *base_addr, __mmask8 k, __m512i vindex, __m512i a, int scale);",
        "description": "Stores 8 packed 64-bit integer elements located in \"a\" and stores them in memory locations starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\" using writemask \"k\" (elements whose corresponding mask bit is not set are not written to memory)."
    },
    {
        "name": "_mm512_i32loscatter_pd",
        "full_name": "void _mm512_i32loscatter_pd(void *base_addr, __m512i vindex, __m512d a, int scale);",
        "description": "Stores 8 packed double-precision (64-bit) floating-point elements in \"a\" and to memory locations starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\"."
    },
    {
        "name": "_mm512_mask_i32loscatter_pd",
        "full_name": "void _mm512_mask_i32loscatter_pd(void *base_addr, __mmask8 k, __m512i vindex, __m512d a, int scale);",
        "description": "Stores 8 packed double-precision (64-bit) floating-point elements in \"a\" to memory locations starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\". Only those elements whose corresponding mask bit is set in writemask \"k\" are written to memory."
    },
    {
        "name": "_mm_mask_packs_epi16",
        "full_name": "__m128i _mm_mask_packs_epi16(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_packs_epi16",
        "full_name": "__m128i _mm_maskz_packs_epi16(__mmask16 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_packs_epi32",
        "full_name": "__m128i _mm_mask_packs_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_packs_epi32",
        "full_name": "__m128i _mm_maskz_packs_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_packs_epi16",
        "full_name": "__m256i _mm256_packs_epi16(__m256i a, __m256i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_packs_epi16",
        "full_name": "__m256i _mm256_mask_packs_epi16(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_packs_epi16",
        "full_name": "__m256i _mm256_maskz_packs_epi16(__mmask32 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_packs_epi32",
        "full_name": "__m256i _mm256_mask_packs_epi32(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_packs_epi32",
        "full_name": "__m256i _mm256_maskz_packs_epi32(__mmask16 k, __m256i a, __m256i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_packs_epi16",
        "full_name": "__m512i _mm512_packs_epi16(__m512i a, __m512i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_packs_epi16",
        "full_name": "__m512i _mm512_mask_packs_epi16(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_packs_epi16",
        "full_name": "__m512i _mm512_maskz_packs_epi16(__mmask64 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_packs_epi32",
        "full_name": "__m512i _mm512_mask_packs_epi32(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_packs_epi32",
        "full_name": "__m512i _mm512_maskz_packs_epi32(__mmask32 k, __m512i a, __m512i b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_packs_pi16",
        "full_name": "__m64 _mm_packs_pi16(__m64 a, __m64 b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_packs_pi32",
        "full_name": "__m64 _mm_packs_pi32(__m64 a, __m64 b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_packs_pu16",
        "full_name": "__m64 _mm_packs_pu16(__m64 a, __m64 b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_packssdw",
        "full_name": "__m64 _m_packssdw(__m64 a, __m64 b);",
        "description": "Convert packed signed 32-bit integers from \"a\" and \"b\" to packed 16-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_packsswb",
        "full_name": "__m64 _m_packsswb(__m64 a, __m64 b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using signed saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_packuswb",
        "full_name": "__m64 _m_packuswb(__m64 a, __m64 b);",
        "description": "Convert packed signed 16-bit integers from \"a\" and \"b\" to packed 8-bit integers using unsigned saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_paddsb",
        "full_name": "__m64 _m_paddsb(__m64 a, __m64 b);",
        "description": "Add packed signed 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_paddsw",
        "full_name": "__m64 _m_paddsw(__m64 a, __m64 b);",
        "description": "Add packed signed 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_paddusb",
        "full_name": "__m64 _m_paddusb(__m64 a, __m64 b);",
        "description": "Add packed unsigned 8-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_paddusw",
        "full_name": "__m64 _m_paddusw(__m64 a, __m64 b);",
        "description": "Add packed unsigned 16-bit integers in \"a\" and \"b\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_permute_ps",
        "full_name": "__m128 _mm_permute_ps(__m128 a, int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permute_ps",
        "full_name": "__m128 _mm_mask_permute_ps(__m128 src, __mmask8 k, __m128 a, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permute_ps",
        "full_name": "__m128 _mm_maskz_permute_ps(__mmask8 k, __m128 a, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permute_ps",
        "full_name": "__m256 _mm256_permute_ps(__m256 a, int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permute_ps",
        "full_name": "__m256 _mm256_mask_permute_ps(__m256 src, __mmask8 k, __m256 a, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permute_ps",
        "full_name": "__m256 _mm256_maskz_permute_ps(__mmask8 k, __m256 a, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permute_ps",
        "full_name": "__m512 _mm512_permute_ps(__m512 a, int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permute_ps",
        "full_name": "__m512 _mm512_mask_permute_ps(__m512 src, __mmask16 k, __m512 a, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permute_ps",
        "full_name": "__m512 _mm512_maskz_permute_ps(__mmask16 k, __m512 a, const int imm8);",
        "description": "Shuffle single-precision (32-bit) floating-point elements in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutex_epi64",
        "full_name": "__m256i _mm256_permutex_epi64(__m256i a, const int imm8);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutex_epi64",
        "full_name": "__m256i _mm256_mask_permutex_epi64(__m256i src, __mmask8 k, __m256i a, const int imm8);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex_epi64",
        "full_name": "__m256i _mm256_maskz_permutex_epi64(__mmask8 k, __m256i a, const int imm8);",
        "description": "Shuffle 64-bit integers in \"a\" across lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutex_epi64",
        "full_name": "__m512i _mm512_permutex_epi64(__m512i a, const int imm8);",
        "description": "Shuffle 64-bit integers in \"a\" within 256-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutex_epi64",
        "full_name": "__m512i _mm512_mask_permutex_epi64(__m512i src, __mmask8 k, __m512i a, const int imm8);",
        "description": "Shuffle 64-bit integers in \"a\" within 256-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex_epi64",
        "full_name": "__m512i _mm512_maskz_permutex_epi64(__mmask8 k, __m512i a, const int imm8);",
        "description": "Shuffle 64-bit integers in \"a\" within 256-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutex_pd",
        "full_name": "__m256d _mm256_permutex_pd(__m256d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutex_pd",
        "full_name": "__m256d _mm256_mask_permutex_pd(__m256d src, __mmask8 k, __m256d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex_pd",
        "full_name": "__m256d _mm256_maskz_permutex_pd(__mmask8 k, __m256d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" across lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutex_pd",
        "full_name": "__m512d _mm512_permutex_pd(__m512d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 256-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutex_pd",
        "full_name": "__m512d _mm512_mask_permutex_pd(__m512d src, __mmask8 k, __m512d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 256-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex_pd",
        "full_name": "__m512d _mm512_maskz_permutex_pd(__mmask8 k, __m512d a, const int imm8);",
        "description": "Shuffle double-precision (64-bit) floating-point elements in \"a\" within 256-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_m_psadbw",
        "full_name": "__m64 _m_psadbw(__m64 a, __m64 b);",
        "description": "Compute the absolute differences of packed unsigned 8-bit integers in \"a\" and \"b\", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of \"dst\"."
    },
    {
        "name": "_m_pshufw",
        "full_name": "__m64 _m_pshufw(__m64 a, int imm8);",
        "description": "Shuffle 16-bit integers in \"a\" using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_m_psubusb",
        "full_name": "__m64 _m_psubusb(__m64 a, __m64 b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_psubusw",
        "full_name": "__m64 _m_psubusw(__m64 a, __m64 b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_m_punpckhbw",
        "full_name": "__m64 _m_punpckhbw(__m64 a, __m64 b);",
        "description": "Unpack and interleave 8-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_punpckhwd",
        "full_name": "__m64 _m_punpckhwd(__m64 a, __m64 b);",
        "description": "Unpack and interleave 16-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_punpcklbw",
        "full_name": "__m64 _m_punpcklbw(__m64 a, __m64 b);",
        "description": "Unpack and interleave 8-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_m_punpcklwd",
        "full_name": "__m64 _m_punpcklwd(__m64 a, __m64 b);",
        "description": "Unpack and interleave 16-bit integers from the low half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_rcp14_ps",
        "full_name": "__m128 _mm_mask_rcp14_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_maskz_rcp14_ps",
        "full_name": "__m128 _mm_maskz_rcp14_ps(__mmask8 k, __m128 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_rcp14_ss",
        "full_name": "__m128 _mm_rcp14_ss(__m128 a, __m128 b);",
        "description": "Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_mask_rcp14_ss",
        "full_name": "__m128 _mm_mask_rcp14_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_maskz_rcp14_ss",
        "full_name": "__m128 _mm_maskz_rcp14_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm256_rcp14_ps",
        "full_name": "__m256 _mm256_rcp14_ps(__m256 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm256_mask_rcp14_ps",
        "full_name": "__m256 _mm256_mask_rcp14_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm256_maskz_rcp14_ps",
        "full_name": "__m256 _mm256_maskz_rcp14_ps(__mmask8 k, __m256 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm512_rcp14_ps",
        "full_name": "__m512 _mm512_rcp14_ps(__m512 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm512_mask_rcp14_ps",
        "full_name": "__m512 _mm512_mask_rcp14_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm512_maskz_rcp14_ps",
        "full_name": "__m512 _mm512_maskz_rcp14_ps(__mmask16 k, __m512 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_mask_rsqrt14_ps",
        "full_name": "__m128 _mm_mask_rsqrt14_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_maskz_rsqrt14_ps",
        "full_name": "__m128 _mm_maskz_rsqrt14_ps(__mmask8 k, __m128 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm256_mask_rsqrt14_ps",
        "full_name": "__m256 _mm256_mask_rsqrt14_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm256_maskz_rsqrt14_ps",
        "full_name": "__m256 _mm256_maskz_rsqrt14_ps(__mmask8 k, __m256 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm512_rsqrt14_ps",
        "full_name": "__m512 _mm512_rsqrt14_ps(__m512 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm512_mask_rsqrt14_ps",
        "full_name": "__m512 _mm512_mask_rsqrt14_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm512_maskz_rsqrt14_ps",
        "full_name": "__m512 _mm512_maskz_rsqrt14_ps(__mmask16 k, __m512 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_rsqrt14_ss",
        "full_name": "__m128 _mm_rsqrt14_ss(__m128 a, __m128 b);",
        "description": "Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_mask_rsqrt14_ss",
        "full_name": "__m128 _mm_mask_rsqrt14_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_maskz_rsqrt14_ss",
        "full_name": "__m128 _mm_maskz_rsqrt14_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_sad_pu8",
        "full_name": "__m64 _mm_sad_pu8(__m64 a, __m64 b);",
        "description": "Compute the absolute differences of packed unsigned 8-bit integers in \"a\" and \"b\", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of \"dst\"."
    },
    {
        "name": "_mm_sad_epu8",
        "full_name": "__m128i _mm_sad_epu8(__m128i a, __m128i b);",
        "description": "Compute the absolute differences of packed unsigned 8-bit integers in \"a\" and \"b\", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in \"dst\"."
    },
    {
        "name": "_mm256_sad_epu8",
        "full_name": "__m256i _mm256_sad_epu8(__m256i a, __m256i b);",
        "description": "Compute the absolute differences of packed unsigned 8-bit integers in \"a\" and \"b\", then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in \"dst\"."
    },
    {
        "name": "_mm512_sad_epu8",
        "full_name": "__m512i _mm512_sad_epu8(__m512i a, __m512i b);",
        "description": "Compute the absolute differences of packed unsigned 8-bit integers in \"a\" and \"b\", then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in \"dst\"."
    },
    {
        "name": "_mm_mask_i32scatter_ps",
        "full_name": "void _mm_mask_i32scatter_ps(void *base_addr, __mmask8 k, __m128i vindex, __m128 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32scatter_ps",
        "full_name": "void _mm256_i32scatter_ps(void *base_addr, __m256i vindex, __m256 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i32scatter_ps",
        "full_name": "void _mm256_mask_i32scatter_ps(void *base_addr, __mmask8 k, __m256i vindex, __m256 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32scatter_ps",
        "full_name": "void _mm512_i32scatter_ps(void *base_addr, __m512i vindex, __m512 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32scatter_ps",
        "full_name": "void _mm512_mask_i32scatter_ps(void *base_addr, __mmask16 k, __m512i vindex, __m512 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i64gather_epi32",
        "full_name": "__m128i _mm_i64gather_epi32(int const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64gather_epi32",
        "full_name": "__m128i _mm_mask_i64gather_epi32(__m128i src, int const * base_addr, __m128i vindex, __m128i mask, const int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i64gather_epi32",
        "full_name": "__m128i _mm_mmask_i64gather_epi32(__m128i src, __mmask8 k, __m128i vindex, int const * base_addr, const int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64gather_epi32",
        "full_name": "__m128i _mm256_i64gather_epi32(int const * base_addr, __m256i vindex, const int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i64gather_epi32",
        "full_name": "__m128i _mm256_mask_i64gather_epi32(__m128i src, int const * base_addr, __m256i vindex, __m128i mask, const int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i64gather_epi32",
        "full_name": "__m128i _mm256_mmask_i64gather_epi32(__m128i src, __mmask8 k, __m256i vindex, void const * base_addr, const int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64gather_epi32",
        "full_name": "__m256i _mm512_i64gather_epi32(__m512i vindex, void const * base_addr, int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64gather_epi32",
        "full_name": "__m256i _mm512_mask_i64gather_epi32(__m256i src, __mmask8 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i64gather_epi64",
        "full_name": "__m128i _mm_i64gather_epi64(__int64 const * base_addr, __m128i vindex, const int scale);",
        "description": "Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64gather_epi64",
        "full_name": "__m128i _mm_mask_i64gather_epi64(__m128i src, __int64 const * base_addr, __m128i vindex, __m128i mask, const int scale);",
        "description": "Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using \"mask\" (elements are copied from \"src\" when the highest bit is not set in the corresponding element). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mmask_i64gather_epi64",
        "full_name": "__m128i _mm_mmask_i64gather_epi64(__m128i src, __mmask8 k, __m128i vindex, void const * base_addr, const int scale);",
        "description": "Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64gather_epi64",
        "full_name": "__m256i _mm256_i64gather_epi64(__int64 const * base_addr, __m256i vindex, const int scale);",
        "description": "Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mmask_i64gather_epi64",
        "full_name": "__m256i _mm256_mmask_i64gather_epi64(__m256i src, __mmask8 k, __m256i vindex, void const * base_addr, const int scale);",
        "description": "Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64gather_epi64",
        "full_name": "__m512i _mm512_i64gather_epi64(__m512i vindex, void const * base_addr, int scale);",
        "description": "Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\". \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64gather_epi64",
        "full_name": "__m512i _mm512_mask_i64gather_epi64(__m512i src, __mmask8 k, __m512i vindex, void const * base_addr, int scale);",
        "description": "Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged into \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i64scatter_epi32",
        "full_name": "void _mm_i64scatter_epi32(void *base_addr, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64scatter_epi32",
        "full_name": "void _mm_mask_i64scatter_epi32(void *base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i64scatter_epi64",
        "full_name": "void _mm_i64scatter_epi64(void *base_addr, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64scatter_epi64",
        "full_name": "void _mm_mask_i64scatter_epi64(void *base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i64scatter_pd",
        "full_name": "void _mm_i64scatter_pd(void *base_addr, __m128i vindex, __m128d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64scatter_pd",
        "full_name": "void _mm_mask_i64scatter_pd(void *base_addr, __mmask8 k, __m128i vindex, __m128d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i64scatter_ps",
        "full_name": "void _mm_i64scatter_ps(void *base_addr, __m128i vindex, __m128 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i64scatter_ps",
        "full_name": "void _mm_mask_i64scatter_ps(void *base_addr, __mmask8 k, __m128i vindex, __m128 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64scatter_epi32",
        "full_name": "void _mm256_i64scatter_epi32(void *base_addr, __m256i vindex, __m128i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i64scatter_epi32",
        "full_name": "void _mm256_mask_i64scatter_epi32(void *base_addr, __mmask8 k, __m256i vindex, __m128i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64scatter_epi64",
        "full_name": "void _mm256_i64scatter_epi64(void *base_addr, __m256i vindex, __m256i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i64scatter_epi64",
        "full_name": "void _mm256_mask_i64scatter_epi64(void *base_addr, __mmask8 k, __m256i vindex, __m256i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64scatter_pd",
        "full_name": "void _mm256_i64scatter_pd(void *base_addr, __m256i vindex, __m256d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i64scatter_pd",
        "full_name": "void _mm256_mask_i64scatter_pd(void *base_addr, __mmask8 k, __m256i vindex, __m256d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i64scatter_ps",
        "full_name": "void _mm256_i64scatter_ps(void *base_addr, __m256i vindex, __m128 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i64scatter_ps",
        "full_name": "void _mm256_mask_i64scatter_ps(void *base_addr, __mmask8 k, __m256i vindex, __m128 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64scatter_epi32",
        "full_name": "void _mm512_i64scatter_epi32(void *base_addr, __m512i vindex, __m256i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64scatter_epi32",
        "full_name": "void _mm512_mask_i64scatter_epi32(void *base_addr, __mmask8 k, __m512i vindex, __m256i a, const int scale);",
        "description": "Scatter 32-bit integers from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64scatter_epi64",
        "full_name": "void _mm512_i64scatter_epi64(void *base_addr, __m512i vindex, __m512i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64scatter_epi64",
        "full_name": "void _mm512_mask_i64scatter_epi64(void *base_addr, __mmask8 k, __m512i vindex, __m512i a, const int scale);",
        "description": "Scatter 64-bit integers from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64scatter_pd",
        "full_name": "void _mm512_i64scatter_pd(void *base_addr, __m512i vindex, __m512d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64scatter_pd",
        "full_name": "void _mm512_mask_i64scatter_pd(void *base_addr, __mmask8 k, __m512i vindex, __m512d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 64-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i64scatter_ps",
        "full_name": "void _mm512_i64scatter_ps(void *base_addr, __m512i vindex, __m256 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i64scatter_ps",
        "full_name": "void _mm512_mask_i64scatter_ps(void *base_addr, __mmask8 k, __m512i vindex, __m256 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 64-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_idivrem_epi32",
        "full_name": "__m128i _mm_idivrem_epi32(__m128i *mem_addr, __m128i a, __m128i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", store the truncated results in \"dst\", and store the remainders as packed 32-bit integers into memory at \"mem_addr\"."
    },
    {
        "name": "_mm256_idivrem_epi32",
        "full_name": "__m256i _mm256_idivrem_epi32(__m256i *mem_addr, __m256i a, __m256i b);",
        "description": "Divide packed 32-bit integers in \"a\" by packed elements in \"b\", store the truncated results in \"dst\", and store the remainders as packed 32-bit integers into memory at \"mem_addr\"."
    },
    {
        "name": "_mm_maddubs_pi16",
        "full_name": "__m64 _mm_maddubs_pi16(__m64 a, __m64 b);",
        "description": "Vertically multiply each unsigned 8-bit integer from \"a\" with the corresponding signed 8-bit integer from \"b\", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in \"dst\"."
    },
    {
        "name": "_mm_roundscale_ss",
        "full_name": "__m128 _mm_roundscale_ss(__m128 a, __m128 b, const int imm8);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_mask_roundscale_ss",
        "full_name": "__m128 _mm_mask_roundscale_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, const int imm8);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_maskz_roundscale_ss",
        "full_name": "__m128 _mm_maskz_roundscale_ss(__mmask8 k, __m128 a, __m128 b, const int imm8);",
        "description": "Round the lower single-precision (32-bit) floating-point element in \"b\" to the number of fraction bits specified by \"imm8\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". [round_imm_note]"
    },
    {
        "name": "_mm_scalef_pd",
        "full_name": "__m128d _mm_scalef_pd(__m128d a, __m128d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_scalef_pd",
        "full_name": "__m128d _mm_mask_scalef_pd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_scalef_pd",
        "full_name": "__m128d _mm_maskz_scalef_pd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_scalef_ps",
        "full_name": "__m128 _mm_scalef_ps(__m128 a, __m128 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_scalef_ps",
        "full_name": "__m128 _mm_mask_scalef_ps(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_scalef_ps",
        "full_name": "__m128 _mm_maskz_scalef_ps(__mmask8 k, __m128 a, __m128 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_scalef_round_sd",
        "full_name": "__m128d _mm_scalef_round_sd(__m128d a, __m128d b, int rounding);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_scalef_round_sd",
        "full_name": "__m128d _mm_mask_scalef_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_scalef_round_sd",
        "full_name": "__m128d _mm_maskz_scalef_round_sd(__mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_scalef_round_ss",
        "full_name": "__m128 _mm_scalef_round_ss(__m128 a, __m128 b, int rounding);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_scalef_round_ss",
        "full_name": "__m128 _mm_mask_scalef_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_scalef_round_ss",
        "full_name": "__m128 _mm_maskz_scalef_round_ss(__mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm256_scalef_pd",
        "full_name": "__m256d _mm256_scalef_pd(__m256d a, __m256d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_scalef_pd",
        "full_name": "__m256d _mm256_mask_scalef_pd(__m256d src, __mmask8 k, __m256d a, __m256d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_scalef_pd",
        "full_name": "__m256d _mm256_maskz_scalef_pd(__mmask8 k, __m256d a, __m256d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_scalef_ps",
        "full_name": "__m256 _mm256_scalef_ps(__m256 a, __m256 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_scalef_ps",
        "full_name": "__m256 _mm256_mask_scalef_ps(__m256 src, __mmask8 k, __m256 a, __m256 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_scalef_ps",
        "full_name": "__m256 _mm256_maskz_scalef_ps(__mmask8 k, __m256 a, __m256 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_scalef_pd",
        "full_name": "__m512d _mm512_scalef_pd(__m512d a, __m512d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_scalef_pd",
        "full_name": "__m512d _mm512_mask_scalef_pd(__m512d src, __mmask8 k, __m512d a, __m512d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_scalef_pd",
        "full_name": "__m512d _mm512_maskz_scalef_pd(__mmask8 k, __m512d a, __m512d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_scalef_ps",
        "full_name": "__m512 _mm512_scalef_ps(__m512 a, __m512 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_scalef_ps",
        "full_name": "__m512 _mm512_mask_scalef_ps(__m512 src, __mmask16 k, __m512 a, __m512 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_scalef_ps",
        "full_name": "__m512 _mm512_maskz_scalef_ps(__mmask16 k, __m512 a, __m512 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_scalef_round_pd",
        "full_name": "__m512d _mm512_scalef_round_pd(__m512d a, __m512d b, int rounding);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_scalef_round_pd",
        "full_name": "__m512d _mm512_mask_scalef_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_scalef_round_pd",
        "full_name": "__m512d _mm512_maskz_scalef_round_pd(__mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_scalef_round_ps",
        "full_name": "__m512 _mm512_scalef_round_ps(__m512 a, __m512 b, int rounding);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_scalef_round_ps",
        "full_name": "__m512 _mm512_mask_scalef_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_scalef_round_ps",
        "full_name": "__m512 _mm512_maskz_scalef_round_ps(__mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm_log_pd",
        "full_name": "__m128d _mm_log_pd(__m128d a);",
        "description": "Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_log_ps",
        "full_name": "__m128 _mm_log_ps(__m128 a);",
        "description": "Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_log10_pd",
        "full_name": "__m128d _mm_log10_pd(__m128d a);",
        "description": "Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_log10_ps",
        "full_name": "__m128 _mm_log10_ps(__m128 a);",
        "description": "Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_log1p_pd",
        "full_name": "__m128d _mm_log1p_pd(__m128d a);",
        "description": "Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_log1p_ps",
        "full_name": "__m128 _mm_log1p_ps(__m128 a);",
        "description": "Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_log2_pd",
        "full_name": "__m128d _mm_log2_pd(__m128d a);",
        "description": "Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_log2_ps",
        "full_name": "__m128 _mm_log2_ps(__m128 a);",
        "description": "Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log_pd",
        "full_name": "__m256d _mm256_log_pd(__m256d a);",
        "description": "Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log_ps",
        "full_name": "__m256 _mm256_log_ps(__m256 a);",
        "description": "Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log10_pd",
        "full_name": "__m256d _mm256_log10_pd(__m256d a);",
        "description": "Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log10_ps",
        "full_name": "__m256 _mm256_log10_ps(__m256 a);",
        "description": "Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log1p_pd",
        "full_name": "__m256d _mm256_log1p_pd(__m256d a);",
        "description": "Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log1p_ps",
        "full_name": "__m256 _mm256_log1p_ps(__m256 a);",
        "description": "Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log2_pd",
        "full_name": "__m256d _mm256_log2_pd(__m256d a);",
        "description": "Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_log2_ps",
        "full_name": "__m256 _mm256_log2_ps(__m256 a);",
        "description": "Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_log_pd",
        "full_name": "__m512d _mm512_log_pd(__m512d a);",
        "description": "Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log_pd",
        "full_name": "__m512d _mm512_mask_log_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the natural logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_log_ps",
        "full_name": "__m512 _mm512_log_ps(__m512 a);",
        "description": "Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log_ps",
        "full_name": "__m512 _mm512_mask_log_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the natural logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_log10_pd",
        "full_name": "__m512d _mm512_log10_pd(__m512d a);",
        "description": "Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log10_pd",
        "full_name": "__m512d _mm512_mask_log10_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the base-10 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_log10_ps",
        "full_name": "__m512 _mm512_log10_ps(__m512 a);",
        "description": "Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log10_ps",
        "full_name": "__m512 _mm512_mask_log10_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the base-10 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_log1p_pd",
        "full_name": "__m512d _mm512_log1p_pd(__m512d a);",
        "description": "Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log1p_pd",
        "full_name": "__m512d _mm512_mask_log1p_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the natural logarithm of one plus packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_log1p_ps",
        "full_name": "__m512 _mm512_log1p_ps(__m512 a);",
        "description": "Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log1p_ps",
        "full_name": "__m512 _mm512_mask_log1p_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the natural logarithm of one plus packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_log2_pd",
        "full_name": "__m512d _mm512_log2_pd(__m512d a);",
        "description": "Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log2_pd",
        "full_name": "__m512d _mm512_mask_log2_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the base-2 logarithm of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_log2_ps",
        "full_name": "__m512 _mm512_log2_ps(__m512 a);",
        "description": "Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_log2_ps",
        "full_name": "__m512 _mm512_mask_log2_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the base-2 logarithm of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_broadcast_ss",
        "full_name": "__m128 _mm_broadcast_ss(float const * mem_addr);",
        "description": "Broadcast a single-precision (32-bit) floating-point element from memory to all elements of \"dst\"."
    },
    {
        "name": "_mm256_loadu2_m128",
        "full_name": "__m256 _mm256_loadu2_m128(float const * hiaddr, float const * loaddr);",
        "description": "Load two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value in \"dst\".\n\t\"hiaddr\" and \"loaddr\" do not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_loadu2_m128d",
        "full_name": "__m256d _mm256_loadu2_m128d(double const * hiaddr, double const * loaddr);",
        "description": "Load two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value in \"dst\".\n\t\"hiaddr\" and \"loaddr\" do not need to be aligned on any particular boundary."
    },
    {
        "name": "_rdtsc",
        "full_name": "__int64 _rdtsc(void);",
        "description": "Copy the current 64-bit value of the processor's time-stamp counter into \"dst\"."
    },
    {
        "name": "_mm_cmp_round_sd_mask",
        "full_name": "__mmask8 _mm_cmp_round_sd_mask(__m128d a, __m128d b, const int imm8, const int sae);",
        "description": "Compare the lower double-precision (64-bit) floating-point element in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the result in mask vector \"k\". [sae_note]"
    },
    {
        "name": "_mm512_cmp_round_pd_mask",
        "full_name": "__mmask8 _mm512_cmp_round_pd_mask(__m512d a, __m512d b, const int imm8, const int sae);",
        "description": "Compare packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" based on the comparison operand specified by \"imm8\", and store the results in mask vector \"k\". [sae_note]"
    },
    {
        "name": "_mm_mask_range_round_sd",
        "full_name": "__m128d _mm_mask_range_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm_mask_range_round_ss",
        "full_name": "__m128 _mm_mask_range_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm_maskz_range_round_ss",
        "full_name": "__m128 _mm_maskz_range_round_ss(__mmask8 k, __m128 a, __m128 b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm256_range_pd",
        "full_name": "__m256d _mm256_range_pd(__m256d a, __m256d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm256_mask_range_pd",
        "full_name": "__m256d _mm256_mask_range_pd(__m256d src, __mmask8 k, __m256d a, __m256d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm256_maskz_range_pd",
        "full_name": "__m256d _mm256_maskz_range_pd(__mmask8 k, __m256d a, __m256d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm512_range_pd",
        "full_name": "__m512d _mm512_range_pd(__m512d a, __m512d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm512_mask_range_pd",
        "full_name": "__m512d _mm512_mask_range_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_mm512_maskz_range_pd",
        "full_name": "__m512d _mm512_maskz_range_pd(__mmask8 k, __m512d a, __m512d b, int imm8);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit."
    },
    {
        "name": "_lrotl",
        "full_name": "unsigned long _lrotl(unsigned long a, int shift);",
        "description": "Shift the bits of unsigned long integer \"a\" left by the number of bits specified in \"shift\", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_lrotr",
        "full_name": "unsigned long _lrotr(unsigned long a, int shift);",
        "description": "Shift the bits of unsigned long integer \"a\" right by the number of bits specified in \"shift\", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_mm_scalef_sd",
        "full_name": "__m128d _mm_scalef_sd(__m128d a, __m128d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_scalef_sd",
        "full_name": "__m128d _mm_mask_scalef_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_scalef_sd",
        "full_name": "__m128d _mm_maskz_scalef_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Scale the packed double-precision (64-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_scalef_ss",
        "full_name": "__m128 _mm_scalef_ss(__m128 a, __m128 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_scalef_ss",
        "full_name": "__m128 _mm_mask_scalef_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_scalef_ss",
        "full_name": "__m128 _mm_maskz_scalef_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Scale the packed single-precision (32-bit) floating-point elements in \"a\" using values from \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_shuffle_pi16",
        "full_name": "__m64 _mm_shuffle_pi16(__m64 a, int imm8);",
        "description": "Shuffle 16-bit integers in \"a\" using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_shuffle_epi32",
        "full_name": "__m128i _mm_mask_shuffle_epi32(__m128i src, __mmask8 k, __m128i a, _MM_PERM_ENUM imm8);",
        "description": "Shuffle 32-bit integers in \"a\" using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_shuffle_epi32",
        "full_name": "__m128i _mm_maskz_shuffle_epi32(__mmask8 k, __m128i a, _MM_PERM_ENUM imm8);",
        "description": "Shuffle 32-bit integers in \"a\" using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sin_pd",
        "full_name": "__m128d _mm_sin_pd(__m128d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sin_ps",
        "full_name": "__m128 _mm_sin_ps(__m128 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sincos_pd",
        "full_name": "__m128d _mm_sincos_pd(__m128d *mem_addr, __m128d a);",
        "description": "Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", and store the cosine into memory at \"mem_addr\"."
    },
    {
        "name": "_mm_sincos_ps",
        "full_name": "__m128 _mm_sincos_ps(__m128 *mem_addr, __m128 a);",
        "description": "Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", and store the cosine into memory at \"mem_addr\"."
    },
    {
        "name": "_mm_sinh_pd",
        "full_name": "__m128d _mm_sinh_pd(__m128d a);",
        "description": "Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sinh_ps",
        "full_name": "__m128 _mm_sinh_ps(__m128 a);",
        "description": "Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sqrt_pd",
        "full_name": "__m128d _mm_sqrt_pd(__m128d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_sqrt_pd",
        "full_name": "__m128d _mm_mask_sqrt_pd(__m128d src, __mmask8 k, __m128d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sqrt_pd",
        "full_name": "__m128d _mm_maskz_sqrt_pd(__mmask8 k, __m128d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sqrt_ps",
        "full_name": "__m128 _mm_sqrt_ps(__m128 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_sqrt_ps",
        "full_name": "__m128 _mm_mask_sqrt_ps(__m128 src, __mmask8 k, __m128 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_sqrt_ps",
        "full_name": "__m128 _mm_maskz_sqrt_ps(__mmask8 k, __m128 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sqrt_round_sd",
        "full_name": "__m128d _mm_sqrt_round_sd(__m128d a, __m128d b, int rounding);",
        "description": "Compute the square root of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_sqrt_round_sd",
        "full_name": "__m128d _mm_mask_sqrt_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Compute the square root of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_sqrt_round_sd",
        "full_name": "__m128d _mm_maskz_sqrt_round_sd(__mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Compute the square root of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_sqrt_round_ss",
        "full_name": "__m128 _mm_sqrt_round_ss(__m128 a, __m128 b, int rounding);",
        "description": "Compute the square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_sqrt_round_ss",
        "full_name": "__m128 _mm_mask_sqrt_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Compute the square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_sqrt_round_ss",
        "full_name": "__m128 _mm_maskz_sqrt_round_ss(__mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Compute the square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_sqrt_ss",
        "full_name": "__m128 _mm_sqrt_ss(__m128 a);",
        "description": "Compute the square root of the lower single-precision (32-bit) floating-point element in \"a\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_sqrt_ss",
        "full_name": "__m128 _mm_mask_sqrt_ss(__m128 src, __mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_maskz_sqrt_ss",
        "full_name": "__m128 _mm_maskz_sqrt_ss(__mmask8 k, __m128 a, __m128 b);",
        "description": "Compute the square root of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_sqrt_sd",
        "full_name": "__m128d _mm_sqrt_sd(__m128d a, __m128d b);",
        "description": "Compute the square root of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_sqrt_sd",
        "full_name": "__m128d _mm_mask_sqrt_sd(__m128d src, __mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the square root of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_maskz_sqrt_sd",
        "full_name": "__m128d _mm_maskz_sqrt_sd(__mmask8 k, __m128d a, __m128d b);",
        "description": "Compute the square root of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_mask_subs_epi16",
        "full_name": "__m128i _mm_mask_subs_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_subs_epi16",
        "full_name": "__m128i _mm_maskz_subs_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_subs_epi8",
        "full_name": "__m128i _mm_mask_subs_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_subs_epi8",
        "full_name": "__m128i _mm_maskz_subs_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_subs_epu16",
        "full_name": "__m128i _mm_subs_epu16(__m128i a, __m128i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_subs_epu16",
        "full_name": "__m128i _mm_mask_subs_epu16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_subs_epu16",
        "full_name": "__m128i _mm_maskz_subs_epu16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_subs_epu8",
        "full_name": "__m128i _mm_subs_epu8(__m128i a, __m128i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_subs_epu8",
        "full_name": "__m128i _mm_mask_subs_epu8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_subs_epu8",
        "full_name": "__m128i _mm_maskz_subs_epu8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_subs_pi8",
        "full_name": "__m64 _mm_subs_pi8(__m64 a, __m64 b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_subs_pi16",
        "full_name": "__m64 _mm_subs_pi16(__m64 a, __m64 b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_subs_pu8",
        "full_name": "__m64 _mm_subs_pu8(__m64 a, __m64 b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_subs_pu16",
        "full_name": "__m64 _mm_subs_pu16(__m64 a, __m64 b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_svml_ceil_pd",
        "full_name": "__m128d _mm_svml_ceil_pd(__m128d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm_svml_ceil_ps",
        "full_name": "__m128 _mm_svml_ceil_ps(__m128 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm_svml_floor_pd",
        "full_name": "__m128d _mm_svml_floor_pd(__m128d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm_svml_floor_ps",
        "full_name": "__m128 _mm_svml_floor_ps(__m128 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm_svml_round_pd",
        "full_name": "__m128d _mm_svml_round_pd(__m128d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" to the nearest integer value, and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm_svml_round_ps",
        "full_name": "__m128 _mm_svml_round_ps(__m128 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" to the nearest integer value, and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm_svml_sqrt_pd",
        "full_name": "__m128d _mm_svml_sqrt_pd(__m128d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\". Note that this intrinsic is less efficient than \"_mm_sqrt_pd\"."
    },
    {
        "name": "_mm_svml_sqrt_ps",
        "full_name": "__m128 _mm_svml_sqrt_ps(__m128 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". Note that this intrinsic is less efficient than \"_mm_sqrt_ps\"."
    },
    {
        "name": "_mm_tan_pd",
        "full_name": "__m128d _mm_tan_pd(__m128d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_tan_ps",
        "full_name": "__m128 _mm_tan_ps(__m128 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_tanh_pd",
        "full_name": "__m128d _mm_tanh_pd(__m128d a);",
        "description": "Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_tanh_ps",
        "full_name": "__m128 _mm_tanh_ps(__m128 a);",
        "description": "Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm_unpackhi_epi16",
        "full_name": "__m128i _mm_unpackhi_epi16(__m128i a, __m128i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_unpackhi_epi16",
        "full_name": "__m128i _mm_mask_unpackhi_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpackhi_epi16",
        "full_name": "__m128i _mm_maskz_unpackhi_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_unpackhi_epi32",
        "full_name": "__m128i _mm_mask_unpackhi_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_unpackhi_epi32",
        "full_name": "__m128i _mm_maskz_unpackhi_epi32(__mmask8 k, __m128i a, __m128i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_range_round_sd",
        "full_name": "__m128d _mm_maskz_range_round_sd(__mmask8 k, __m128d a, __m128d b, int imm8, int sae);",
        "description": "Calculate the max, min, absolute max, or absolute min (depending on control in \"imm8\") for the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\timm8[1:0] specifies the operation control: 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.\n\timm8[3:2] specifies the sign control: 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. [sae_note]"
    },
    {
        "name": "_mm_add_round_sd",
        "full_name": "__m128d _mm_add_round_sd(__m128d a, __m128d b, int rounding);",
        "description": "Add the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_add_round_sd",
        "full_name": "__m128d _mm_mask_add_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Add the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_maskz_add_round_sd",
        "full_name": "__m128d _mm_maskz_add_round_sd(__mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Add the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_add_round_ss",
        "full_name": "__m128 _mm_add_round_ss(__m128 a, __m128 b, int rounding);",
        "description": "Add the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_mask_add_round_ss",
        "full_name": "__m128 _mm_mask_add_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Add the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t\t[round_note]"
    },
    {
        "name": "_mm_maskz_add_round_ss",
        "full_name": "__m128 _mm_maskz_add_round_ss(__mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Add the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_fmadd_round_sd",
        "full_name": "__m128d _mm_fmadd_round_sd(__m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fmadd_round_sd",
        "full_name": "__m128d _mm_mask_fmadd_round_sd(__m128d a, __mmask8 k, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fmadd_round_sd",
        "full_name": "__m128d _mm_mask3_fmadd_round_sd(__m128d a, __m128d b, __m128d c, __mmask8 k, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fmadd_round_sd",
        "full_name": "__m128d _mm_maskz_fmadd_round_sd(__mmask8 k, __m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_fmadd_round_ss",
        "full_name": "__m128 _mm_fmadd_round_ss(__m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fmadd_round_ss",
        "full_name": "__m128 _mm_mask_fmadd_round_ss(__m128 a, __mmask8 k, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fmadd_round_ss",
        "full_name": "__m128 _mm_mask3_fmadd_round_ss(__m128 a, __m128 b, __m128 c, __mmask8 k, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fmadd_round_ss",
        "full_name": "__m128 _mm_maskz_fmadd_round_ss(__mmask8 k, __m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_fmsub_round_sd",
        "full_name": "__m128d _mm_fmsub_round_sd(__m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fmsub_round_sd",
        "full_name": "__m128d _mm_mask_fmsub_round_sd(__m128d a, __mmask8 k, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fmsub_round_sd",
        "full_name": "__m128d _mm_mask3_fmsub_round_sd(__m128d a, __m128d b, __m128d c, __mmask8 k, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fmsub_round_sd",
        "full_name": "__m128d _mm_maskz_fmsub_round_sd(__mmask8 k, __m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_fmsub_round_ss",
        "full_name": "__m128 _mm_fmsub_round_ss(__m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fmsub_round_ss",
        "full_name": "__m128 _mm_mask_fmsub_round_ss(__m128 a, __mmask8 k, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fmsub_round_ss",
        "full_name": "__m128 _mm_mask3_fmsub_round_ss(__m128 a, __m128 b, __m128 c, __mmask8 k, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fmsub_round_ss",
        "full_name": "__m128 _mm_maskz_fmsub_round_ss(__mmask8 k, __m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the intermediate result. Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_fnmadd_round_sd",
        "full_name": "__m128d _mm_fnmadd_round_sd(__m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fnmadd_round_sd",
        "full_name": "__m128d _mm_mask_fnmadd_round_sd(__m128d a, __mmask8 k, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fnmadd_round_sd",
        "full_name": "__m128d _mm_mask3_fnmadd_round_sd(__m128d a, __m128d b, __m128d c, __mmask8 k, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fnmadd_round_sd",
        "full_name": "__m128d _mm_maskz_fnmadd_round_sd(__mmask8 k, __m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_fnmadd_round_ss",
        "full_name": "__m128 _mm_fnmadd_round_ss(__m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fnmadd_round_ss",
        "full_name": "__m128 _mm_mask_fnmadd_round_ss(__m128 a, __mmask8 k, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fnmadd_round_ss",
        "full_name": "__m128 _mm_mask3_fnmadd_round_ss(__m128 a, __m128 b, __m128 c, __mmask8 k, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fnmadd_round_ss",
        "full_name": "__m128 _mm_maskz_fnmadd_round_ss(__mmask8 k, __m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and add the negated intermediate result to the lower element in \"c\". Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm_fnmsub_round_sd",
        "full_name": "__m128d _mm_fnmsub_round_sd(__m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fnmsub_round_sd",
        "full_name": "__m128d _mm_mask_fnmsub_round_sd(__m128d a, __mmask8 k, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fnmsub_round_sd",
        "full_name": "__m128d _mm_mask3_fnmsub_round_sd(__m128d a, __m128d b, __m128d c, __mmask8 k, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper element from \"c\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fnmsub_round_sd",
        "full_name": "__m128d _mm_maskz_fnmsub_round_sd(__mmask8 k, __m128d a, __m128d b, __m128d c, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_fnmsub_round_ss",
        "full_name": "__m128 _mm_fnmsub_round_ss(__m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract the lower element in \"c\" from the negated intermediate result, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_fnmsub_round_ss",
        "full_name": "__m128 _mm_mask_fnmsub_round_ss(__m128 a, __mmask8 k, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask3_fnmsub_round_ss",
        "full_name": "__m128 _mm_mask3_fnmsub_round_ss(__m128 a, __m128 b, __m128 c, __mmask8 k, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"c\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"c\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_fnmsub_round_ss",
        "full_name": "__m128 _mm_maskz_fnmsub_round_ss(__mmask8 k, __m128 a, __m128 b, __m128 c, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\", and subtract the lower element in \"c\" from the negated intermediate result. Store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mul_round_sd",
        "full_name": "__m128d _mm_mul_round_sd(__m128d a, __m128d b, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_mask_mul_round_sd",
        "full_name": "__m128d _mm_mask_mul_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_maskz_mul_round_sd",
        "full_name": "__m128d _mm_maskz_mul_round_sd(__mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Multiply the lower double-precision (64-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_mul_round_ss",
        "full_name": "__m128 _mm_mul_round_ss(__m128 a, __m128 b, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_mask_mul_round_ss",
        "full_name": "__m128 _mm_mask_mul_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_maskz_mul_round_ss",
        "full_name": "__m128 _mm_maskz_mul_round_ss(__mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Multiply the lower single-precision (32-bit) floating-point element in \"a\" and \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t\t[round_note]"
    },
    {
        "name": "_mm_sub_round_sd",
        "full_name": "__m128d _mm_sub_round_sd(__m128d a, __m128d b, int rounding);",
        "description": "Subtract the lower double-precision (64-bit) floating-point element in \"b\" from the lower double-precision (64-bit) floating-point element in \"a\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_sub_round_sd",
        "full_name": "__m128d _mm_mask_sub_round_sd(__m128d src, __mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Subtract the lower double-precision (64-bit) floating-point element in \"b\" from the lower double-precision (64-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_sub_round_sd",
        "full_name": "__m128d _mm_maskz_sub_round_sd(__mmask8 k, __m128d a, __m128d b, int rounding);",
        "description": "Subtract the lower double-precision (64-bit) floating-point element in \"b\" from the lower double-precision (64-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_sub_round_ss",
        "full_name": "__m128 _mm_sub_round_ss(__m128 a, __m128 b, int rounding);",
        "description": "Subtract the lower single-precision (32-bit) floating-point element in \"b\" from the lower single-precision (32-bit) floating-point element in \"a\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_mask_sub_round_ss",
        "full_name": "__m128 _mm_mask_sub_round_ss(__m128 src, __mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Subtract the lower single-precision (32-bit) floating-point element in \"b\" from the lower single-precision (32-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"src\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_maskz_sub_round_ss",
        "full_name": "__m128 _mm_maskz_sub_round_ss(__mmask8 k, __m128 a, __m128 b, int rounding);",
        "description": "Subtract the lower single-precision (32-bit) floating-point element in \"b\" from the lower single-precision (32-bit) floating-point element in \"a\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm_trunc_pd",
        "full_name": "__m128d _mm_trunc_pd(__m128d a);",
        "description": "Truncate the packed double-precision (64-bit) floating-point elements in \"a\", and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm_trunc_ps",
        "full_name": "__m128 _mm_trunc_ps(__m128 a);",
        "description": "Truncate the packed single-precision (32-bit) floating-point elements in \"a\", and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm_udiv_epi32",
        "full_name": "__m128i _mm_udiv_epi32(__m128i a, __m128i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm_udivrem_epi32",
        "full_name": "__m128i _mm_udivrem_epi32(__m128i *mem_addr, __m128i a, __m128i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", store the truncated results in \"dst\", and store the remainders as packed unsigned 32-bit integers into memory at \"mem_addr\"."
    },
    {
        "name": "_mm256_shuffle_epi32",
        "full_name": "__m256i _mm256_shuffle_epi32(__m256i a, const int imm8);",
        "description": "Shuffle 32-bit integers in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_shuffle_epi32",
        "full_name": "__m256i _mm256_mask_shuffle_epi32(__m256i src, __mmask8 k, __m256i a, _MM_PERM_ENUM imm8);",
        "description": "Shuffle 32-bit integers in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_epi32",
        "full_name": "__m256i _mm256_maskz_shuffle_epi32(__mmask8 k, __m256i a, _MM_PERM_ENUM imm8);",
        "description": "Shuffle 32-bit integers in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shuffle_f32x4",
        "full_name": "__m256 _mm256_shuffle_f32x4(__m256 a, __m256 b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_shuffle_f32x4",
        "full_name": "__m256 _mm256_mask_shuffle_f32x4(__m256 src, __mmask8 k, __m256 a, __m256 b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_f32x4",
        "full_name": "__m256 _mm256_maskz_shuffle_f32x4(__mmask8 k, __m256 a, __m256 b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shuffle_f64x2",
        "full_name": "__m256d _mm256_shuffle_f64x2(__m256d a, __m256d b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_shuffle_f64x2",
        "full_name": "__m256d _mm256_mask_shuffle_f64x2(__m256d src, __mmask8 k, __m256d a, __m256d b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_f64x2",
        "full_name": "__m256d _mm256_maskz_shuffle_f64x2(__mmask8 k, __m256d a, __m256d b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shuffle_i32x4",
        "full_name": "__m256i _mm256_shuffle_i32x4(__m256i a, __m256i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 32-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_shuffle_i32x4",
        "full_name": "__m256i _mm256_mask_shuffle_i32x4(__m256i src, __mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 32-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_i32x4",
        "full_name": "__m256i _mm256_maskz_shuffle_i32x4(__mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 32-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shuffle_i64x2",
        "full_name": "__m256i _mm256_shuffle_i64x2(__m256i a, __m256i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 64-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_shuffle_i64x2",
        "full_name": "__m256i _mm256_mask_shuffle_i64x2(__m256i src, __mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 64-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shuffle_i64x2",
        "full_name": "__m256i _mm256_maskz_shuffle_i64x2(__mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 64-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_sin_pd",
        "full_name": "__m256d _mm256_sin_pd(__m256d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sin_ps",
        "full_name": "__m256 _mm256_sin_ps(__m256 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sincos_pd",
        "full_name": "__m256d _mm256_sincos_pd(__m256d *mem_addr, __m256d a);",
        "description": "Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", and store the cosine into memory at \"mem_addr\"."
    },
    {
        "name": "_mm256_sincos_ps",
        "full_name": "__m256 _mm256_sincos_ps(__m256 *mem_addr, __m256 a);",
        "description": "Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", and store the cosine into memory at \"mem_addr\"."
    },
    {
        "name": "_mm256_sinh_pd",
        "full_name": "__m256d _mm256_sinh_pd(__m256d a);",
        "description": "Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sinh_ps",
        "full_name": "__m256 _mm256_sinh_ps(__m256 a);",
        "description": "Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sqrt_pd",
        "full_name": "__m256d _mm256_sqrt_pd(__m256d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_sqrt_pd",
        "full_name": "__m256d _mm256_mask_sqrt_pd(__m256d src, __mmask8 k, __m256d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sqrt_pd",
        "full_name": "__m256d _mm256_maskz_sqrt_pd(__mmask8 k, __m256d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_sqrt_ps",
        "full_name": "__m256 _mm256_sqrt_ps(__m256 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_sqrt_ps",
        "full_name": "__m256 _mm256_mask_sqrt_ps(__m256 src, __mmask8 k, __m256 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_sqrt_ps",
        "full_name": "__m256 _mm256_maskz_sqrt_ps(__mmask8 k, __m256 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_subs_epi16",
        "full_name": "__m256i _mm256_mask_subs_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_subs_epi16",
        "full_name": "__m256i _mm256_maskz_subs_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_subs_epi8",
        "full_name": "__m256i _mm256_mask_subs_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_subs_epi8",
        "full_name": "__m256i _mm256_maskz_subs_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_subs_epu16",
        "full_name": "__m256i _mm256_mask_subs_epu16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_subs_epu16",
        "full_name": "__m256i _mm256_maskz_subs_epu16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_subs_epu8",
        "full_name": "__m256i _mm256_mask_subs_epu8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_subs_epu8",
        "full_name": "__m256i _mm256_maskz_subs_epu8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_svml_ceil_pd",
        "full_name": "__m256d _mm256_svml_ceil_pd(__m256d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm256_svml_ceil_ps",
        "full_name": "__m256 _mm256_svml_ceil_ps(__m256 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" up to an integer value, and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm256_svml_floor_pd",
        "full_name": "__m256d _mm256_svml_floor_pd(__m256d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm256_svml_floor_ps",
        "full_name": "__m256 _mm256_svml_floor_ps(__m256 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" down to an integer value, and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm256_svml_round_pd",
        "full_name": "__m256d _mm256_svml_round_pd(__m256d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" to the nearest integer value, and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm256_svml_round_ps",
        "full_name": "__m256 _mm256_svml_round_ps(__m256 a);",
        "description": "Round the packed single-precision (32-bit) floating-point elements in \"a\" to the nearest integer value, and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm256_svml_sqrt_pd",
        "full_name": "__m256d _mm256_svml_sqrt_pd(__m256d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\". Note that this intrinsic is less efficient than \"_mm_sqrt_pd\"."
    },
    {
        "name": "_mm256_svml_sqrt_ps",
        "full_name": "__m256 _mm256_svml_sqrt_ps(__m256 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". Note that this intrinsic is less efficient than \"_mm_sqrt_ps\"."
    },
    {
        "name": "_mm256_tan_pd",
        "full_name": "__m256d _mm256_tan_pd(__m256d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_tan_ps",
        "full_name": "__m256 _mm256_tan_ps(__m256 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_tanh_pd",
        "full_name": "__m256d _mm256_tanh_pd(__m256d a);",
        "description": "Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_tanh_ps",
        "full_name": "__m256 _mm256_tanh_ps(__m256 a);",
        "description": "Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_unpackhi_epi16",
        "full_name": "__m256i _mm256_unpackhi_epi16(__m256i a, __m256i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_unpackhi_epi16",
        "full_name": "__m256i _mm256_mask_unpackhi_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpackhi_epi16",
        "full_name": "__m256i _mm256_maskz_unpackhi_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_unpackhi_epi32",
        "full_name": "__m256i _mm256_mask_unpackhi_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_unpackhi_epi32",
        "full_name": "__m256i _mm256_maskz_unpackhi_epi32(__mmask8 k, __m256i a, __m256i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_trunc_pd",
        "full_name": "__m256d _mm256_trunc_pd(__m256d a);",
        "description": "Truncate the packed double-precision (64-bit) floating-point elements in \"a\", and store the results as packed double-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundpd\"/\"vroundpd\" instruction."
    },
    {
        "name": "_mm256_trunc_ps",
        "full_name": "__m256 _mm256_trunc_ps(__m256 a);",
        "description": "Truncate the packed single-precision (32-bit) floating-point elements in \"a\", and store the results as packed single-precision floating-point elements in \"dst\". This intrinsic may generate the \"roundps\"/\"vroundps\" instruction."
    },
    {
        "name": "_mm256_udiv_epi32",
        "full_name": "__m256i _mm256_udiv_epi32(__m256i a, __m256i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", and store the truncated results in \"dst\"."
    },
    {
        "name": "_mm256_udivrem_epi32",
        "full_name": "__m256i _mm256_udivrem_epi32(__m256i *mem_addr, __m256i a, __m256i b);",
        "description": "Divide packed unsigned 32-bit integers in \"a\" by packed elements in \"b\", store the truncated results in \"dst\", and store the remainders as packed unsigned 32-bit integers into memory at \"mem_addr\"."
    },
    {
        "name": "_mm512_shuffle_epi32",
        "full_name": "__m512i _mm512_shuffle_epi32(__m512i a, _MM_PERM_ENUM imm8);",
        "description": "Shuffle 32-bit integers in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_shuffle_epi32",
        "full_name": "__m512i _mm512_mask_shuffle_epi32(__m512i src, __mmask16 k, __m512i a, _MM_PERM_ENUM imm8);",
        "description": "Shuffle 32-bit integers in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shuffle_epi32",
        "full_name": "__m512i _mm512_maskz_shuffle_epi32(__mmask16 k, __m512i a, _MM_PERM_ENUM imm8);",
        "description": "Shuffle 32-bit integers in \"a\" within 128-bit lanes using the control in \"imm8\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shuffle_f32x4",
        "full_name": "__m512 _mm512_shuffle_f32x4(__m512 a, __m512 b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_shuffle_f32x4",
        "full_name": "__m512 _mm512_mask_shuffle_f32x4(__m512 src, __mmask16 k, __m512 a, __m512 b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shuffle_f32x4",
        "full_name": "__m512 _mm512_maskz_shuffle_f32x4(__mmask16 k, __m512 a, __m512 b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shuffle_f64x2",
        "full_name": "__m512d _mm512_shuffle_f64x2(__m512d a, __m512d b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_shuffle_f64x2",
        "full_name": "__m512d _mm512_mask_shuffle_f64x2(__m512d src, __mmask8 k, __m512d a, __m512d b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shuffle_f64x2",
        "full_name": "__m512d _mm512_maskz_shuffle_f64x2(__mmask8 k, __m512d a, __m512d b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shuffle_i32x4",
        "full_name": "__m512i _mm512_shuffle_i32x4(__m512i a, __m512i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 32-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_shuffle_i32x4",
        "full_name": "__m512i _mm512_mask_shuffle_i32x4(__m512i src, __mmask16 k, __m512i a, __m512i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 32-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shuffle_i32x4",
        "full_name": "__m512i _mm512_maskz_shuffle_i32x4(__mmask16 k, __m512i a, __m512i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 4 32-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shuffle_i64x2",
        "full_name": "__m512i _mm512_shuffle_i64x2(__m512i a, __m512i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 64-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_shuffle_i64x2",
        "full_name": "__m512i _mm512_mask_shuffle_i64x2(__m512i src, __mmask8 k, __m512i a, __m512i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 64-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shuffle_i64x2",
        "full_name": "__m512i _mm512_maskz_shuffle_i64x2(__mmask8 k, __m512i a, __m512i b, const int imm8);",
        "description": "Shuffle 128-bits (composed of 2 64-bit integers) selected by \"imm8\" from \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sin_pd",
        "full_name": "__m512d _mm512_sin_pd(__m512d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sin_pd",
        "full_name": "__m512d _mm512_mask_sin_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sin_ps",
        "full_name": "__m512 _mm512_sin_ps(__m512 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sin_ps",
        "full_name": "__m512 _mm512_mask_sin_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sincos_pd",
        "full_name": "__m512d _mm512_sincos_pd(__m512d *mem_addr, __m512d a);",
        "description": "Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", and store the cosine into memory at \"mem_addr\"."
    },
    {
        "name": "_mm512_mask_sincos_pd",
        "full_name": "__m512d _mm512_mask_sincos_pd(__m512d *mem_addr, __m512d src1, __m512d src2, __mmask8 k, __m512d a);",
        "description": "Compute the sine and cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", store the cosine into memory at \"mem_addr\". Elements are written to their respective locations using writemask \"k\" (elements are copied from \"sin_src\" or \"cos_src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sincos_ps",
        "full_name": "__m512 _mm512_sincos_ps(__m512 *mem_addr, __m512 a);",
        "description": "Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", and store the cosine into memory at \"mem_addr\"."
    },
    {
        "name": "_mm512_mask_sincos_ps",
        "full_name": "__m512 _mm512_mask_sincos_ps(__m512 *mem_addr, __m512 src1, __m512 src2, __mmask16 k, __m512 a);",
        "description": "Compute the sine and cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, store the sine in \"dst\", store the cosine into memory at \"mem_addr\". Elements are written to their respective locations using writemask \"k\" (elements are copied from \"sin_src\" or \"cos_src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sinh_pd",
        "full_name": "__m512d _mm512_sinh_pd(__m512d a);",
        "description": "Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sinh_pd",
        "full_name": "__m512d _mm512_mask_sinh_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the hyperbolic sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sinh_ps",
        "full_name": "__m512 _mm512_sinh_ps(__m512 a);",
        "description": "Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sinh_ps",
        "full_name": "__m512 _mm512_mask_sinh_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the hyperbolic sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sqrt_pd",
        "full_name": "__m512d _mm512_sqrt_pd(__m512d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sqrt_pd",
        "full_name": "__m512d _mm512_mask_sqrt_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sqrt_pd",
        "full_name": "__m512d _mm512_maskz_sqrt_pd(__mmask8 k, __m512d a);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sqrt_ps",
        "full_name": "__m512 _mm512_sqrt_ps(__m512 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sqrt_ps",
        "full_name": "__m512 _mm512_mask_sqrt_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_sqrt_ps",
        "full_name": "__m512 _mm512_maskz_sqrt_ps(__mmask16 k, __m512 a);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sqrt_round_pd",
        "full_name": "__m512d _mm512_sqrt_round_pd(__m512d a, int rounding);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\".\n\t[round_note]."
    },
    {
        "name": "_mm512_mask_sqrt_round_pd",
        "full_name": "__m512d _mm512_mask_sqrt_round_pd(__m512d src, __mmask8 k, __m512d a, int rounding);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_sqrt_round_pd",
        "full_name": "__m512d _mm512_maskz_sqrt_round_pd(__mmask8 k, __m512d a, int rounding);",
        "description": "Compute the square root of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]."
    },
    {
        "name": "_mm512_sqrt_round_ps",
        "full_name": "__m512 _mm512_sqrt_round_ps(__m512 a, int rounding);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\".\n\t[round_note]."
    },
    {
        "name": "_mm512_mask_sqrt_round_ps",
        "full_name": "__m512 _mm512_mask_sqrt_round_ps(__m512 src, __mmask16 k, __m512 a, int rounding);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_sqrt_round_ps",
        "full_name": "__m512 _mm512_maskz_sqrt_round_ps(__mmask16 k, __m512 a, int rounding);",
        "description": "Compute the square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_subs_epi16",
        "full_name": "__m512i _mm512_mask_subs_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_subs_epi16",
        "full_name": "__m512i _mm512_maskz_subs_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Subtract packed signed 16-bit integers in \"b\" from packed 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_subs_epi8",
        "full_name": "__m512i _mm512_mask_subs_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_subs_epi8",
        "full_name": "__m512i _mm512_maskz_subs_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Subtract packed signed 8-bit integers in \"b\" from packed 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_subs_epu16",
        "full_name": "__m512i _mm512_mask_subs_epu16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_subs_epu16",
        "full_name": "__m512i _mm512_maskz_subs_epu16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Subtract packed unsigned 16-bit integers in \"b\" from packed unsigned 16-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_subs_epu8",
        "full_name": "__m512i _mm512_mask_subs_epu8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_subs_epu8",
        "full_name": "__m512i _mm512_maskz_subs_epu8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Subtract packed unsigned 8-bit integers in \"b\" from packed unsigned 8-bit integers in \"a\" using saturation, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_svml_round_pd",
        "full_name": "__m512d _mm512_svml_round_pd(__m512d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" to the nearest integer value, and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_svml_round_pd",
        "full_name": "__m512d _mm512_mask_svml_round_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Round the packed double-precision (64-bit) floating-point elements in \"a\" to the nearest integer value, and store the results as packed double-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_tan_pd",
        "full_name": "__m512d _mm512_tan_pd(__m512d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_tan_pd",
        "full_name": "__m512d _mm512_mask_tan_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_tan_ps",
        "full_name": "__m512 _mm512_tan_ps(__m512 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_tan_ps",
        "full_name": "__m512 _mm512_mask_tan_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_tanh_pd",
        "full_name": "__m512d _mm512_tanh_pd(__m512d a);",
        "description": "Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_tanh_pd",
        "full_name": "__m512d _mm512_mask_tanh_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the hyperbolic tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_tanh_ps",
        "full_name": "__m512 _mm512_tanh_ps(__m512 a);",
        "description": "Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_tanh_ps",
        "full_name": "__m512 _mm512_mask_tanh_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the hyperbolic tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in radians, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_unpackhi_epi16",
        "full_name": "__m512i _mm512_unpackhi_epi16(__m512i a, __m512i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_unpackhi_epi16",
        "full_name": "__m512i _mm512_mask_unpackhi_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpackhi_epi16",
        "full_name": "__m512i _mm512_maskz_unpackhi_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 16-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_unpackhi_epi32",
        "full_name": "__m512i _mm512_mask_unpackhi_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_unpackhi_epi32",
        "full_name": "__m512i _mm512_maskz_unpackhi_epi32(__mmask16 k, __m512i a, __m512i b);",
        "description": "Unpack and interleave 32-bit integers from the high half of each 128-bit lane in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_add_round_pd",
        "full_name": "__m512d _mm512_mask_add_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_add_round_pd",
        "full_name": "__m512d _mm512_maskz_add_round_pd(__mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Add packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_add_round_ps",
        "full_name": "__m512 _mm512_mask_add_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_add_round_ps",
        "full_name": "__m512 _mm512_maskz_add_round_ps(__mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Add packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_fmadd_round_pd",
        "full_name": "__m512d _mm512_fmadd_round_pd(__m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmadd_round_pd",
        "full_name": "__m512d _mm512_mask_fmadd_round_pd(__m512d a, __mmask8 k, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_mask3_fmadd_round_pd",
        "full_name": "__m512d _mm512_mask3_fmadd_round_pd(__m512d a, __m512d b, __m512d c, __mmask8 k, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_fmadd_round_pd",
        "full_name": "__m512d _mm512_maskz_fmadd_round_pd(__mmask8 k, __m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_fmadd_round_ps",
        "full_name": "__m512 _mm512_fmadd_round_ps(__m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmadd_round_ps",
        "full_name": "__m512 _mm512_mask_fmadd_round_ps(__m512 a, __mmask16 k, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_mask3_fmadd_round_ps",
        "full_name": "__m512 _mm512_mask3_fmadd_round_ps(__m512 a, __m512 b, __m512 c, __mmask16 k, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_fmadd_round_ps",
        "full_name": "__m512 _mm512_maskz_fmadd_round_ps(__mmask16 k, __m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the intermediate result to packed elements in \"c\", and store the results in \"a\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_fmsub_round_pd",
        "full_name": "__m512d _mm512_fmsub_round_pd(__m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmsub_round_pd",
        "full_name": "__m512d _mm512_mask_fmsub_round_pd(__m512d a, __mmask8 k, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_mask3_fmsub_round_pd",
        "full_name": "__m512d _mm512_mask3_fmsub_round_pd(__m512d a, __m512d b, __m512d c, __mmask8 k, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fmsub_round_pd",
        "full_name": "__m512d _mm512_maskz_fmsub_round_pd(__mmask8 k, __m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_fmsub_round_ps",
        "full_name": "__m512 _mm512_fmsub_round_ps(__m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmsub_round_ps",
        "full_name": "__m512 _mm512_mask_fmsub_round_ps(__m512 a, __mmask16 k, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_mask3_fmsub_round_ps",
        "full_name": "__m512 _mm512_mask3_fmsub_round_ps(__m512 a, __m512 b, __m512 c, __mmask16 k, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fmsub_round_ps",
        "full_name": "__m512 _mm512_maskz_fmsub_round_ps(__mmask16 k, __m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_fmsubadd_round_pd",
        "full_name": "__m512d _mm512_fmsubadd_round_pd(__m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmsubadd_round_pd",
        "full_name": "__m512d _mm512_mask_fmsubadd_round_pd(__m512d a, __mmask8 k, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_mask3_fmsubadd_round_pd",
        "full_name": "__m512d _mm512_mask3_fmsubadd_round_pd(__m512d a, __m512d b, __m512d c, __mmask8 k, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fmsubadd_round_pd",
        "full_name": "__m512d _mm512_maskz_fmsubadd_round_pd(__mmask8 k, __m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_fmsubadd_round_ps",
        "full_name": "__m512 _mm512_fmsubadd_round_ps(__m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fmsubadd_round_ps",
        "full_name": "__m512 _mm512_mask_fmsubadd_round_ps(__m512 a, __mmask16 k, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_mask3_fmsubadd_round_ps",
        "full_name": "__m512 _mm512_mask3_fmsubadd_round_ps(__m512 a, __m512 b, __m512 c, __mmask16 k, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fmsubadd_round_ps",
        "full_name": "__m512 _mm512_maskz_fmsubadd_round_ps(__mmask16 k, __m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", alternatively subtract and add packed elements in \"c\" from/to the intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_fnmadd_round_pd",
        "full_name": "__m512d _mm512_fnmadd_round_pd(__m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\".\n\t [round_note]"
    },
    {
        "name": "_mm512_mask_fnmadd_round_pd",
        "full_name": "__m512d _mm512_mask_fnmadd_round_pd(__m512d a, __mmask8 k, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_mask3_fnmadd_round_pd",
        "full_name": "__m512d _mm512_mask3_fnmadd_round_pd(__m512d a, __m512d b, __m512d c, __mmask8 k, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fnmadd_round_pd",
        "full_name": "__m512d _mm512_maskz_fnmadd_round_pd(__mmask8 k, __m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_fnmadd_round_ps",
        "full_name": "__m512 _mm512_fnmadd_round_ps(__m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\".  \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fnmadd_round_ps",
        "full_name": "__m512 _mm512_mask_fnmadd_round_ps(__m512 a, __mmask16 k, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_mask3_fnmadd_round_ps",
        "full_name": "__m512 _mm512_mask3_fnmadd_round_ps(__m512 a, __m512 b, __m512 c, __mmask16 k, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fnmadd_round_ps",
        "full_name": "__m512 _mm512_maskz_fnmadd_round_ps(__mmask16 k, __m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", add the negated intermediate result to packed elements in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_fnmsub_round_pd",
        "full_name": "__m512d _mm512_fnmsub_round_pd(__m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\".  \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fnmsub_round_pd",
        "full_name": "__m512d _mm512_mask_fnmsub_round_pd(__m512d a, __mmask8 k, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_mask3_fnmsub_round_pd",
        "full_name": "__m512d _mm512_mask3_fnmsub_round_pd(__m512d a, __m512d b, __m512d c, __mmask8 k, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_maskz_fnmsub_round_pd",
        "full_name": "__m512d _mm512_maskz_fnmsub_round_pd(__mmask8 k, __m512d a, __m512d b, __m512d c, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). [round_note]"
    },
    {
        "name": "_mm512_fnmsub_round_ps",
        "full_name": "__m512 _mm512_fnmsub_round_ps(__m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\". \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_fnmsub_round_ps",
        "full_name": "__m512 _mm512_mask_fnmsub_round_ps(__m512 a, __mmask16 k, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_mask3_fnmsub_round_ps",
        "full_name": "__m512 _mm512_mask3_fnmsub_round_ps(__m512 a, __m512 b, __m512 c, __mmask16 k, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"c\" when the corresponding mask bit is not set).  [round_note]"
    },
    {
        "name": "_mm512_maskz_fnmsub_round_ps",
        "full_name": "__m512 _mm512_maskz_fnmsub_round_ps(__mmask16 k, __m512 a, __m512 b, __m512 c, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", subtract packed elements in \"c\" from the negated intermediate result, and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_mul_round_pd",
        "full_name": "__m512d _mm512_mask_mul_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).  \n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_mul_round_pd",
        "full_name": "__m512d _mm512_maskz_mul_round_pd(__mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Multiply packed double-precision (64-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_mul_round_ps",
        "full_name": "__m512 _mm512_mask_mul_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t [round_note]"
    },
    {
        "name": "_mm512_maskz_mul_round_ps",
        "full_name": "__m512 _mm512_maskz_mul_round_ps(__mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Multiply packed single-precision (32-bit) floating-point elements in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \n\t[round_note]"
    },
    {
        "name": "_mm512_mask_sub_round_pd",
        "full_name": "__m512d _mm512_mask_sub_round_pd(__m512d src, __mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_sub_round_pd",
        "full_name": "__m512d _mm512_maskz_sub_round_pd(__mmask8 k, __m512d a, __m512d b, int rounding);",
        "description": "Subtract packed double-precision (64-bit) floating-point elements in \"b\" from packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_mask_sub_round_ps",
        "full_name": "__m512 _mm512_mask_sub_round_ps(__m512 src, __mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_maskz_sub_round_ps",
        "full_name": "__m512 _mm512_maskz_sub_round_ps(__mmask16 k, __m512 a, __m512 b, int rounding);",
        "description": "Subtract packed single-precision (32-bit) floating-point elements in \"b\" from packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[round_note]"
    },
    {
        "name": "_mm512_trunc_pd",
        "full_name": "__m512d _mm512_trunc_pd(__m512d a);",
        "description": "Truncate the packed double-precision (64-bit) floating-point elements in \"a\", and store the results as packed double-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_trunc_pd",
        "full_name": "__m512d _mm512_mask_trunc_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Truncate the packed double-precision (64-bit) floating-point elements in \"a\", and store the results as packed double-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_trunc_ps",
        "full_name": "__m512 _mm512_trunc_ps(__m512 a);",
        "description": "Truncate the packed single-precision (32-bit) floating-point elements in \"a\", and store the results as packed single-precision floating-point elements in \"dst\"."
    },
    {
        "name": "_mm512_mask_trunc_ps",
        "full_name": "__m512 _mm512_mask_trunc_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Truncate the packed single-precision (32-bit) floating-point elements in \"a\", and store the results as packed single-precision floating-point elements in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fpclass_pd_mask",
        "full_name": "__mmask8 _mm_fpclass_pd_mask(__m128d a, const int imm8);",
        "description": "Test packed double-precision (64-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\".\n\t[fpclass_note]"
    },
    {
        "name": "_mm_mask_fpclass_pd_mask",
        "full_name": "__mmask8 _mm_mask_fpclass_pd_mask(__mmask8 k1, __m128d a, const int imm8);",
        "description": "Test packed double-precision (64-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm256_fpclass_pd_mask",
        "full_name": "__mmask8 _mm256_fpclass_pd_mask(__m256d a, const int imm8);",
        "description": "Test packed double-precision (64-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\".\n\t[fpclass_note]"
    },
    {
        "name": "_mm256_mask_fpclass_pd_mask",
        "full_name": "__mmask8 _mm256_mask_fpclass_pd_mask(__mmask8 k1, __m256d a, const int imm8);",
        "description": "Test packed double-precision (64-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm512_fpclass_pd_mask",
        "full_name": "__mmask8 _mm512_fpclass_pd_mask(__m512d a, const int imm8);",
        "description": "Test packed double-precision (64-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\".\n\t[fpclass_note]"
    },
    {
        "name": "_mm512_mask_fpclass_pd_mask",
        "full_name": "__mmask8 _mm512_mask_fpclass_pd_mask(__mmask8 k1, __m512d a, const int imm8);",
        "description": "Test packed double-precision (64-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm_fpclass_ps_mask",
        "full_name": "__mmask8 _mm_fpclass_ps_mask(__m128 a, int imm8);",
        "description": "Test packed single-precision (32-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\".\n\t[fpclass_note]"
    },
    {
        "name": "_mm_mask_fpclass_ps_mask",
        "full_name": "__mmask8 _mm_mask_fpclass_ps_mask(__mmask8 k1, __m128 a, int imm8);",
        "description": "Test packed single-precision (32-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm256_fpclass_ps_mask",
        "full_name": "__mmask8 _mm256_fpclass_ps_mask(__m256 a, int imm8);",
        "description": "Test packed single-precision (32-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\".\n\t[fpclass_note]"
    },
    {
        "name": "_mm256_mask_fpclass_ps_mask",
        "full_name": "__mmask8 _mm256_mask_fpclass_ps_mask(__mmask8 k1, __m256 a, int imm8);",
        "description": "Test packed single-precision (32-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm512_fpclass_ps_mask",
        "full_name": "__mmask16 _mm512_fpclass_ps_mask(__m512 a, int imm8);",
        "description": "Test packed single-precision (32-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\".\n\t[fpclass_note]"
    },
    {
        "name": "_mm512_mask_fpclass_ps_mask",
        "full_name": "__mmask16 _mm512_mask_fpclass_ps_mask(__mmask16 k1, __m512 a, int imm8);",
        "description": "Test packed single-precision (32-bit) floating-point elements in \"a\" for special categories specified by \"imm8\", and store the results in mask vector \"k\" using zeromask \"k1\" (elements are zeroed out when the corresponding mask bit is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm_fpclass_sd_mask",
        "full_name": "__mmask8 _mm_fpclass_sd_mask(__m128d a, int imm8);",
        "description": "Test the lower double-precision (64-bit) floating-point element in \"a\" for special categories specified by \"imm8\", and store the result in mask vector \"k\".\n\t[fpclass_note]"
    },
    {
        "name": "_mm_mask_fpclass_sd_mask",
        "full_name": "__mmask8 _mm_mask_fpclass_sd_mask(__mmask8 k1, __m128d a, int imm8);",
        "description": "Test the lower double-precision (64-bit) floating-point element in \"a\" for special categories specified by \"imm8\", and store the result in mask vector \"k\" using zeromask \"k1\" (the element is zeroed out when mask bit 0 is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm_fpclass_ss_mask",
        "full_name": "__mmask8 _mm_fpclass_ss_mask(__m128 a, int imm8);",
        "description": "Test the lower single-precision (32-bit) floating-point element in \"a\" for special categories specified by \"imm8\", and store the result in mask vector \"k.\n\t[fpclass_note]"
    },
    {
        "name": "_mm_mask_fpclass_ss_mask",
        "full_name": "__mmask8 _mm_mask_fpclass_ss_mask(__mmask8 k1, __m128 a, int imm8);",
        "description": "Test the lower single-precision (32-bit) floating-point element in \"a\" for special categories specified by \"imm8\", and store the result in mask vector \"k\" using zeromask \"k1\" (the element is zeroed out when mask bit 0 is not set).\n\t[fpclass_note]"
    },
    {
        "name": "_mm256_mask_store_ps",
        "full_name": "void _mm256_mask_store_ps(void *mem_addr, __mmask8 k, __m256 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_mask_store_ps",
        "full_name": "void _mm512_mask_store_ps(void *mem_addr, __mmask16 k, __m512 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_mask_store_sd",
        "full_name": "void _mm_mask_store_sd(double *mem_addr, __mmask8 k, __m128d a);",
        "description": "Store the lower double-precision (64-bit) floating-point element from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm_mask_store_ss",
        "full_name": "void _mm_mask_store_ss(float *mem_addr, __mmask8 k, __m128 a);",
        "description": "Store the lower single-precision (32-bit) floating-point element from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" must be aligned on a 16-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_storebe_i16",
        "full_name": "void _storebe_i16(void *ptr, short data);",
        "description": "Perform a bit swap operation of the 16 bits in \"data\", and store the results to memory."
    },
    {
        "name": "_storebe_i32",
        "full_name": "void _storebe_i32(void *ptr, int data);",
        "description": "Perform a bit swap operation of the 32 bits in \"data\", and store the results to memory."
    },
    {
        "name": "_storebe_i64",
        "full_name": "void _storebe_i64(void *ptr, __int64 data);",
        "description": "Perform a bit swap operation of the 64 bits in \"data\", and store the results to memory."
    },
    {
        "name": "_mm_mask_storeu_epi16",
        "full_name": "void _mm_mask_storeu_epi16(void *mem_addr, __mmask8 k, __m128i a);",
        "description": "Store packed 16-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_storeu_epi16",
        "full_name": "void _mm_storeu_epi16(void *mem_addr, __m128i a);",
        "description": "Store 128-bits (composed of 8 packed 16-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_storeu_epi16",
        "full_name": "void _mm256_mask_storeu_epi16(void *mem_addr, __mmask16 k, __m256i a);",
        "description": "Store packed 16-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu_epi16",
        "full_name": "void _mm256_storeu_epi16(void *mem_addr, __m256i a);",
        "description": "Store 256-bits (composed of 16 packed 16-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_storeu_epi16",
        "full_name": "void _mm512_mask_storeu_epi16(void *mem_addr, __mmask32 k, __m512i a);",
        "description": "Store packed 16-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_storeu_epi16",
        "full_name": "void _mm512_storeu_epi16(void *mem_addr, __m512i a);",
        "description": "Store 512-bits (composed of 32 packed 16-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_storeu_epi32",
        "full_name": "void _mm_mask_storeu_epi32(void *mem_addr, __mmask8 k, __m128i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_storeu_epi32",
        "full_name": "void _mm256_mask_storeu_epi32(void *mem_addr, __mmask8 k, __m256i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu_epi32",
        "full_name": "void _mm256_storeu_epi32(void *mem_addr, __m256i a);",
        "description": "Store 256-bits (composed of 8 packed 32-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_storeu_epi32",
        "full_name": "void _mm512_mask_storeu_epi32(void *mem_addr, __mmask16 k, __m512i a);",
        "description": "Store packed 32-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_storeu_epi32",
        "full_name": "void _mm512_storeu_epi32(void *mem_addr, __m512i a);",
        "description": "Store 512-bits (composed of 16 packed 32-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_storeu_epi64",
        "full_name": "void _mm_mask_storeu_epi64(void *mem_addr, __mmask8 k, __m128i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_storeu_epi64",
        "full_name": "void _mm_storeu_epi64(void *mem_addr, __m128i a);",
        "description": "Store 128-bits (composed of 2 packed 64-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_storeu_epi64",
        "full_name": "void _mm256_mask_storeu_epi64(void *mem_addr, __mmask8 k, __m256i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu_epi64",
        "full_name": "void _mm256_storeu_epi64(void *mem_addr, __m256i a);",
        "description": "Store 256-bits (composed of 4 packed 64-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_storeu_epi64",
        "full_name": "void _mm512_mask_storeu_epi64(void *mem_addr, __mmask8 k, __m512i a);",
        "description": "Store packed 64-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_storeu_epi64",
        "full_name": "void _mm512_storeu_epi64(void *mem_addr, __m512i a);",
        "description": "Store 512-bits (composed of 8 packed 64-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_storeu_epi8",
        "full_name": "void _mm_mask_storeu_epi8(void *mem_addr, __mmask16 k, __m128i a);",
        "description": "Store packed 8-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_storeu_epi8",
        "full_name": "void _mm_storeu_epi8(void *mem_addr, __m128i a);",
        "description": "Store 128-bits (composed of 16 packed 8-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_storeu_epi8",
        "full_name": "void _mm256_mask_storeu_epi8(void *mem_addr, __mmask32 k, __m256i a);",
        "description": "Store packed 8-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu_epi8",
        "full_name": "void _mm256_storeu_epi8(void *mem_addr, __m256i a);",
        "description": "Store 256-bits (composed of 32 packed 8-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_storeu_epi8",
        "full_name": "void _mm512_mask_storeu_epi8(void *mem_addr, __mmask64 k, __m512i a);",
        "description": "Store packed 8-bit integers from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_storeu_epi8",
        "full_name": "void _mm512_storeu_epi8(void *mem_addr, __m512i a);",
        "description": "Store 512-bits (composed of 64 packed 8-bit integers) from \"a\" into memory.\n\t\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_storeu_pd",
        "full_name": "void _mm_mask_storeu_pd(void *mem_addr, __mmask8 k, __m128d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_storeu_pd",
        "full_name": "void _mm256_mask_storeu_pd(void *mem_addr, __mmask8 k, __m256d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu_pd",
        "full_name": "void _mm256_storeu_pd(double *mem_addr, __m256d a);",
        "description": "Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_storeu_pd",
        "full_name": "void _mm512_mask_storeu_pd(void *mem_addr, __mmask8 k, __m512d a);",
        "description": "Store packed double-precision (64-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_storeu_pd",
        "full_name": "void _mm512_storeu_pd(void *mem_addr, __m512d a);",
        "description": "Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from \"a\" into memory. \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_mask_storeu_ps",
        "full_name": "void _mm_mask_storeu_ps(void *mem_addr, __mmask8 k, __m128 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_mask_storeu_ps",
        "full_name": "void _mm256_mask_storeu_ps(void *mem_addr, __mmask8 k, __m256 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu_ps",
        "full_name": "void _mm256_storeu_ps(float *mem_addr, __m256 a);",
        "description": "Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"a\" into memory.\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_mask_storeu_ps",
        "full_name": "void _mm512_mask_storeu_ps(void *mem_addr, __mmask16 k, __m512 a);",
        "description": "Store packed single-precision (32-bit) floating-point elements from \"a\" into memory using writemask \"k\".\n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm512_storeu_ps",
        "full_name": "void _mm512_storeu_ps(void *mem_addr, __m512 a);",
        "description": "Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from \"a\" into memory. \n\t\"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_storeu_si16",
        "full_name": "void _mm_storeu_si16(void *mem_addr, __m128i a);",
        "description": "Store 16-bit integer from the first element of \"a\" into memory. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_storeu_si32",
        "full_name": "void _mm_storeu_si32(void *mem_addr, __m128i a);",
        "description": "Store 32-bit integer from the first element of \"a\" into memory. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm_storeu_si64",
        "full_name": "void _mm_storeu_si64(void *mem_addr, __m128i a);",
        "description": "Store 64-bit integer from the first element of \"a\" into memory. \"mem_addr\" does not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu2_m128",
        "full_name": "void _mm256_storeu2_m128(float *hiaddr, float *loaddr, __m256 a);",
        "description": "Store the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from \"a\" into memory two different 128-bit locations.\n\t\"hiaddr\" and \"loaddr\" do not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu2_m128d",
        "full_name": "void _mm256_storeu2_m128d(double *hiaddr, double *loaddr, __m256d a);",
        "description": "Store the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from \"a\" into memory two different 128-bit locations.\n\t\"hiaddr\" and \"loaddr\" do not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_storeu2_m128i",
        "full_name": "void _mm256_storeu2_m128i(__m128i *hiaddr, __m128i *loaddr, __m256i a);",
        "description": "Store the high and low 128-bit halves (each composed of integer data) from \"a\" into memory two different 128-bit locations.\n\t\"hiaddr\" and \"loaddr\" do not need to be aligned on any particular boundary."
    },
    {
        "name": "_mm256_stream_load_si256",
        "full_name": "__m256i _mm256_stream_load_si256(__m256i const * mem_addr);",
        "description": "Load 256-bits of integer data from memory into \"dst\" using a non-temporal memory hint.\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_stream_load_si512",
        "full_name": "__m512i _mm512_stream_load_si512(void const * mem_addr);",
        "description": "Load 512-bits of integer data from memory into \"dst\" using a non-temporal memory hint. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_stream_pd",
        "full_name": "void _mm256_stream_pd(double *mem_addr, __m256d a);",
        "description": "Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from \"a\" into memory using a non-temporal memory hint.\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_stream_pd",
        "full_name": "void _mm512_stream_pd(void *mem_addr, __m512d a);",
        "description": "Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from \"a\" into memory using a non-temporal memory hint. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm256_stream_ps",
        "full_name": "void _mm256_stream_ps(float *mem_addr, __m256 a);",
        "description": "Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from \"a\" into memory using a non-temporal memory hint.\n\t\"mem_addr\" must be aligned on a 32-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_mm512_stream_ps",
        "full_name": "void _mm512_stream_ps(void *mem_addr, __m512 a);",
        "description": "Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from \"a\" into memory using a non-temporal memory hint. \n\t\"mem_addr\" must be aligned on a 64-byte boundary or a general-protection exception may be generated."
    },
    {
        "name": "_subborrow_u32",
        "full_name": "unsigned char _subborrow_u32(unsigned char c_in, unsigned int a, unsigned int b, unsigned int *out);",
        "description": "Add unsigned 8-bit borrow \"c_in\" (carry flag) to unsigned 32-bit integer \"b\", and subtract the result from unsigned 32-bit integer \"a\". Store the unsigned 32-bit result in \"out\", and the carry-out in \"dst\" (carry or overflow flag)."
    },
    {
        "name": "_subborrow_u64",
        "full_name": "unsigned char _subborrow_u64(unsigned char c_in, unsigned __int64 a, unsigned __int64 b, unsigned __int64 *out);",
        "description": "Add unsigned 8-bit borrow \"c_in\" (carry flag) to unsigned 64-bit integer \"b\", and subtract the result from unsigned 64-bit integer \"a\". Store the unsigned 64-bit result in \"out\", and the carry-out in \"dst\" (carry or overflow flag)."
    },
    {
        "name": "_mm_mask_alignr_epi32",
        "full_name": "__m128i _mm_mask_alignr_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 32-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 16 bytes (4 elements) in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_alignr_epi32",
        "full_name": "__m128i _mm_maskz_alignr_epi32(__mmask8 k, __m128i a, __m128i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 32-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 16 bytes (4 elements) in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_alignr_epi32",
        "full_name": "__m256i _mm256_alignr_epi32(__m256i a, __m256i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 64-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 32 bytes (8 elements) in \"dst\"."
    },
    {
        "name": "_mm256_mask_alignr_epi32",
        "full_name": "__m256i _mm256_mask_alignr_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 64-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 32 bytes (8 elements) in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_alignr_epi32",
        "full_name": "__m256i _mm256_maskz_alignr_epi32(__mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 64-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 32 bytes (8 elements) in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_alignr_epi32",
        "full_name": "__m512i _mm512_alignr_epi32(__m512i a, __m512i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 128-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 64 bytes (16 elements) in \"dst\"."
    },
    {
        "name": "_mm512_mask_alignr_epi32",
        "full_name": "__m512i _mm512_mask_alignr_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 128-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and store the low 64 bytes (16 elements) in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_alignr_epi32",
        "full_name": "__m512i _mm512_maskz_alignr_epi32(__mmask16 k, __m512i a, __m512i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 128-byte immediate result, shift the result right by \"imm8\" 32-bit elements, and stores the low 64 bytes (16 elements) in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_alignr_epi64",
        "full_name": "__m128i _mm_mask_alignr_epi64(__m128i src, __mmask8 k, __m128i a, __m128i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 32-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 16 bytes (2 elements) in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_alignr_epi64",
        "full_name": "__m128i _mm_maskz_alignr_epi64(__mmask8 k, __m128i a, __m128i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 32-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 16 bytes (2 elements) in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_alignr_epi64",
        "full_name": "__m256i _mm256_alignr_epi64(__m256i a, __m256i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 64-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 32 bytes (4 elements) in \"dst\"."
    },
    {
        "name": "_mm256_mask_alignr_epi64",
        "full_name": "__m256i _mm256_mask_alignr_epi64(__m256i src, __mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 64-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 32 bytes (4 elements) in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_alignr_epi64",
        "full_name": "__m256i _mm256_maskz_alignr_epi64(__mmask8 k, __m256i a, __m256i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 64-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 32 bytes (4 elements) in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_alignr_epi64",
        "full_name": "__m512i _mm512_alignr_epi64(__m512i a, __m512i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 128-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 64 bytes (8 elements) in \"dst\"."
    },
    {
        "name": "_mm512_mask_alignr_epi64",
        "full_name": "__m512i _mm512_mask_alignr_epi64(__m512i src, __mmask8 k, __m512i a, __m512i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 128-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and store the low 64 bytes (8 elements) in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_alignr_epi64",
        "full_name": "__m512i _mm512_maskz_alignr_epi64(__mmask8 k, __m512i a, __m512i b, const int imm8);",
        "description": "Concatenate \"a\" and \"b\" into a 128-byte immediate result, shift the result right by \"imm8\" 64-bit elements, and stores the low 64 bytes (8 elements) in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_avg_epu16",
        "full_name": "__m128i _mm_mask_avg_epu16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_avg_epu16",
        "full_name": "__m128i _mm_maskz_avg_epu16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_avg_epu16",
        "full_name": "__m256i _mm256_avg_epu16(__m256i a, __m256i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_avg_epu16",
        "full_name": "__m256i _mm256_mask_avg_epu16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_avg_epu16",
        "full_name": "__m256i _mm256_maskz_avg_epu16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_avg_epu16",
        "full_name": "__m512i _mm512_avg_epu16(__m512i a, __m512i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_avg_epu16",
        "full_name": "__m512i _mm512_mask_avg_epu16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_avg_epu16",
        "full_name": "__m512i _mm512_maskz_avg_epu16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Average packed unsigned 16-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_avg_epu8",
        "full_name": "__m128i _mm_mask_avg_epu8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_avg_epu8",
        "full_name": "__m128i _mm_maskz_avg_epu8(__mmask16 k, __m128i a, __m128i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_avg_epu8",
        "full_name": "__m256i _mm256_avg_epu8(__m256i a, __m256i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_avg_epu8",
        "full_name": "__m256i _mm256_mask_avg_epu8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_avg_epu8",
        "full_name": "__m256i _mm256_maskz_avg_epu8(__mmask32 k, __m256i a, __m256i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_avg_epu8",
        "full_name": "__m512i _mm512_avg_epu8(__m512i a, __m512i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_avg_epu8",
        "full_name": "__m512i _mm512_mask_avg_epu8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_avg_epu8",
        "full_name": "__m512i _mm512_maskz_avg_epu8(__mmask64 k, __m512i a, __m512i b);",
        "description": "Average packed unsigned 8-bit integers in \"a\" and \"b\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_kshiftli_mask16",
        "full_name": "__mmask16 _kshiftli_mask16(__mmask16 a, unsigned int count);",
        "description": "Shift the bits of 16-bit mask \"a\" left by \"count\" while shifting in zeros, and store the least significant 16 bits of the result in \"k\"."
    },
    {
        "name": "_kshiftli_mask32",
        "full_name": "__mmask32 _kshiftli_mask32(__mmask32 a, unsigned int count);",
        "description": "Shift the bits of 32-bit mask \"a\" left by \"count\" while shifting in zeros, and store the least significant 32 bits of the result in \"k\"."
    },
    {
        "name": "_kshiftli_mask64",
        "full_name": "__mmask64 _kshiftli_mask64(__mmask64 a, unsigned int count);",
        "description": "Shift the bits of 64-bit mask \"a\" left by \"count\" while shifting in zeros, and store the least significant 64 bits of the result in \"k\"."
    },
    {
        "name": "_kshiftli_mask8",
        "full_name": "__mmask8 _kshiftli_mask8(__mmask8 a, unsigned int count);",
        "description": "Shift the bits of 8-bit mask \"a\" left by \"count\" while shifting in zeros, and store the least significant 8 bits of the result in \"k\"."
    },
    {
        "name": "_kshiftri_mask16",
        "full_name": "__mmask16 _kshiftri_mask16(__mmask16 a, unsigned int count);",
        "description": "Shift the bits of 16-bit mask \"a\" right by \"count\" while shifting in zeros, and store the least significant 16 bits of the result in \"k\"."
    },
    {
        "name": "_kshiftri_mask32",
        "full_name": "__mmask32 _kshiftri_mask32(__mmask32 a, unsigned int count);",
        "description": "Shift the bits of 32-bit mask \"a\" right by \"count\" while shifting in zeros, and store the least significant 32 bits of the result in \"k\"."
    },
    {
        "name": "_kshiftri_mask64",
        "full_name": "__mmask64 _kshiftri_mask64(__mmask64 a, unsigned int count);",
        "description": "Shift the bits of 64-bit mask \"a\" right by \"count\" while shifting in zeros, and store the least significant 64 bits of the result in \"k\"."
    },
    {
        "name": "_kshiftri_mask8",
        "full_name": "__mmask8 _kshiftri_mask8(__mmask8 a, unsigned int count);",
        "description": "Shift the bits of 8-bit mask \"a\" right by \"count\" while shifting in zeros, and store the least significant 8 bits of the result in \"k\"."
    },
    {
        "name": "_mm_mask_mulhrs_epi16",
        "full_name": "__m128i _mm_mask_mulhrs_epi16(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_mulhrs_epi16",
        "full_name": "__m128i _mm_maskz_mulhrs_epi16(__mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_mulhrs_epi16",
        "full_name": "__m256i _mm256_mask_mulhrs_epi16(__m256i src, __mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_mulhrs_epi16",
        "full_name": "__m256i _mm256_maskz_mulhrs_epi16(__mmask16 k, __m256i a, __m256i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_mulhrs_epi16",
        "full_name": "__m512i _mm512_mask_mulhrs_epi16(__m512i src, __mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_mulhrs_epi16",
        "full_name": "__m512i _mm512_maskz_mulhrs_epi16(__mmask32 k, __m512i a, __m512i b);",
        "description": "Multiply packed signed 16-bit integers in \"a\" and \"b\", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_shufflehi_epi16",
        "full_name": "__m128i _mm_mask_shufflehi_epi16(__m128i src, __mmask8 k, __m128i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of \"dst\", with the low 64 bits being copied from from \"a\" to \"dst\", using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_shufflehi_epi16",
        "full_name": "__m128i _mm_maskz_shufflehi_epi16(__mmask8 k, __m128i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of \"dst\", with the low 64 bits being copied from from \"a\" to \"dst\", using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_shufflehi_epi16",
        "full_name": "__m256i _mm256_mask_shufflehi_epi16(__m256i src, __mmask16 k, __m256i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of 128-bit lanes of \"dst\", with the low 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shufflehi_epi16",
        "full_name": "__m256i _mm256_maskz_shufflehi_epi16(__mmask16 k, __m256i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of 128-bit lanes of \"dst\", with the low 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shufflehi_epi16",
        "full_name": "__m256i _mm256_shufflehi_epi16(__m256i a, const int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of 128-bit lanes of \"dst\", with the low 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\"."
    },
    {
        "name": "_mm512_mask_shufflehi_epi16",
        "full_name": "__m512i _mm512_mask_shufflehi_epi16(__m512i src, __mmask32 k, __m512i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of 128-bit lanes of \"dst\", with the low 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shufflehi_epi16",
        "full_name": "__m512i _mm512_maskz_shufflehi_epi16(__mmask32 k, __m512i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of 128-bit lanes of \"dst\", with the low 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shufflehi_epi16",
        "full_name": "__m512i _mm512_shufflehi_epi16(__m512i a, int imm8);",
        "description": "Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the high 64 bits of 128-bit lanes of \"dst\", with the low 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\"."
    },
    {
        "name": "_mm_mask_shufflelo_epi16",
        "full_name": "__m128i _mm_mask_shufflelo_epi16(__m128i src, __mmask8 k, __m128i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of \"dst\", with the high 64 bits being copied from from \"a\" to \"dst\", using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_shufflelo_epi16",
        "full_name": "__m128i _mm_maskz_shufflelo_epi16(__mmask8 k, __m128i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of \"dst\", with the high 64 bits being copied from from \"a\" to \"dst\", using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_shufflelo_epi16",
        "full_name": "__m256i _mm256_mask_shufflelo_epi16(__m256i src, __mmask16 k, __m256i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of 128-bit lanes of \"dst\", with the high 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_shufflelo_epi16",
        "full_name": "__m256i _mm256_maskz_shufflelo_epi16(__mmask16 k, __m256i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of 128-bit lanes of \"dst\", with the high 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_shufflelo_epi16",
        "full_name": "__m256i _mm256_shufflelo_epi16(__m256i a, const int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of 128-bit lanes of \"dst\", with the high 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\"."
    },
    {
        "name": "_mm512_mask_shufflelo_epi16",
        "full_name": "__m512i _mm512_mask_shufflelo_epi16(__m512i src, __mmask32 k, __m512i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of 128-bit lanes of \"dst\", with the high 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_shufflelo_epi16",
        "full_name": "__m512i _mm512_maskz_shufflelo_epi16(__mmask32 k, __m512i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of 128-bit lanes of \"dst\", with the high 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\", using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_shufflelo_epi16",
        "full_name": "__m512i _mm512_shufflelo_epi16(__m512i a, int imm8);",
        "description": "Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of \"a\" using the control in \"imm8\". Store the results in the low 64 bits of 128-bit lanes of \"dst\", with the high 64 bits of 128-bit lanes being copied from from \"a\" to \"dst\"."
    },
    {
        "name": "_mm_sind_pd",
        "full_name": "__m128d _mm_sind_pd(__m128d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm_sind_ps",
        "full_name": "__m128 _mm_sind_ps(__m128 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm_tand_pd",
        "full_name": "__m128d _mm_tand_pd(__m128d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm_tand_ps",
        "full_name": "__m128 _mm_tand_ps(__m128 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm_testc_pd",
        "full_name": "int _mm_testc_pd(__m128d a, __m128d b);",
        "description": "Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 128-bit value, and set \"ZF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"CF\" value."
    },
    {
        "name": "_mm256_testc_pd",
        "full_name": "int _mm256_testc_pd(__m256d a, __m256d b);",
        "description": "Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 256-bit value, and set \"ZF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"CF\" value."
    },
    {
        "name": "_mm_testc_ps",
        "full_name": "int _mm_testc_ps(__m128 a, __m128 b);",
        "description": "Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 128-bit value, and set \"ZF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"CF\" value."
    },
    {
        "name": "_mm256_testc_ps",
        "full_name": "int _mm256_testc_ps(__m256 a, __m256 b);",
        "description": "Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 256-bit value, and set \"ZF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"CF\" value."
    },
    {
        "name": "_mm_testc_si128",
        "full_name": "int _mm_testc_si128(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of 128 bits (representing integer data) in \"a\" and \"b\", and set \"ZF\" to 1 if the result is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", and set \"CF\" to 1 if the result is zero, otherwise set \"CF\" to 0. Return the \"CF\" value."
    },
    {
        "name": "_mm256_testc_si256",
        "full_name": "int _mm256_testc_si256(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of 256 bits (representing integer data) in \"a\" and \"b\", and set \"ZF\" to 1 if the result is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", and set \"CF\" to 1 if the result is zero, otherwise set \"CF\" to 0. Return the \"CF\" value."
    },
    {
        "name": "_mm_testnzc_pd",
        "full_name": "int _mm_testnzc_pd(__m128d a, __m128d b);",
        "description": "Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 128-bit value, and set \"ZF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return 1 if both the \"ZF\" and \"CF\" values are zero, otherwise return 0."
    },
    {
        "name": "_mm256_testnzc_pd",
        "full_name": "int _mm256_testnzc_pd(__m256d a, __m256d b);",
        "description": "Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 256-bit value, and set \"ZF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return 1 if both the \"ZF\" and \"CF\" values are zero, otherwise return 0."
    },
    {
        "name": "_mm_testnzc_ps",
        "full_name": "int _mm_testnzc_ps(__m128 a, __m128 b);",
        "description": "Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 128-bit value, and set \"ZF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return 1 if both the \"ZF\" and \"CF\" values are zero, otherwise return 0."
    },
    {
        "name": "_mm256_testnzc_ps",
        "full_name": "int _mm256_testnzc_ps(__m256 a, __m256 b);",
        "description": "Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 256-bit value, and set \"ZF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return 1 if both the \"ZF\" and \"CF\" values are zero, otherwise return 0."
    },
    {
        "name": "_mm_testnzc_si128",
        "full_name": "int _mm_testnzc_si128(__m128i a, __m128i b);",
        "description": "Compute the bitwise AND of 128 bits (representing integer data) in \"a\" and \"b\", and set \"ZF\" to 1 if the result is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", and set \"CF\" to 1 if the result is zero, otherwise set \"CF\" to 0. Return 1 if both the \"ZF\" and \"CF\" values are zero, otherwise return 0."
    },
    {
        "name": "_mm256_testnzc_si256",
        "full_name": "int _mm256_testnzc_si256(__m256i a, __m256i b);",
        "description": "Compute the bitwise AND of 256 bits (representing integer data) in \"a\" and \"b\", and set \"ZF\" to 1 if the result is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", and set \"CF\" to 1 if the result is zero, otherwise set \"CF\" to 0. Return 1 if both the \"ZF\" and \"CF\" values are zero, otherwise return 0."
    },
    {
        "name": "_mm_testz_pd",
        "full_name": "int _mm_testz_pd(__m128d a, __m128d b);",
        "description": "Compute the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 128-bit value, and set \"ZF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"ZF\" value."
    },
    {
        "name": "_mm256_testz_pd",
        "full_name": "int _mm256_testz_pd(__m256d a, __m256d b);",
        "description": "Compute the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 256-bit value, and set \"ZF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"ZF\" value."
    },
    {
        "name": "_mm_testz_ps",
        "full_name": "int _mm_testz_ps(__m128 a, __m128 b);",
        "description": "Compute the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 128-bit value, and set \"ZF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"ZF\" value."
    },
    {
        "name": "_mm256_testz_ps",
        "full_name": "int _mm256_testz_ps(__m256 a, __m256 b);",
        "description": "Compute the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in \"a\" and \"b\", producing an intermediate 256-bit value, and set \"ZF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"ZF\" to 0. Compute the bitwise NOT of \"a\" and then AND with \"b\", producing an intermediate value, and set \"CF\" to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set \"CF\" to 0. Return the \"ZF\" value."
    },
    {
        "name": "_mm256_zeroall",
        "full_name": "void _mm256_zeroall(void);",
        "description": "Zero the contents of all XMM or YMM registers."
    },
    {
        "name": "_mm_csqrt_ps",
        "full_name": "__m128 _mm_csqrt_ps(__m128 a);",
        "description": "Compute the square root of packed complex snumbers in \"a\", and store the complex results in \"dst\". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number \"complex = vec.fp32[0] + i * vec.fp32[1]\"."
    },
    {
        "name": "_mm256_csqrt_ps",
        "full_name": "__m256 _mm256_csqrt_ps(__m256 a);",
        "description": "Compute the square root of packed complex snumbers in \"a\", and store the complex results in \"dst\". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number \"complex = vec.fp32[0] + i * vec.fp32[1]\"."
    },
    {
        "name": "_mm_erfinv_pd",
        "full_name": "__m128d _mm_erfinv_pd(__m128d a);",
        "description": "Compute the inverse error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_erfinv_ps",
        "full_name": "__m128 _mm_erfinv_ps(__m128 a);",
        "description": "Compute the inverse error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_erfinv_pd",
        "full_name": "__m256d _mm256_erfinv_pd(__m256d a);",
        "description": "Compute the inverse error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_erfinv_ps",
        "full_name": "__m256 _mm256_erfinv_ps(__m256 a);",
        "description": "Compute the inverse error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_erfinv_pd",
        "full_name": "__m512d _mm512_erfinv_pd(__m512d a);",
        "description": "Compute the inverse error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_erfinv_pd",
        "full_name": "__m512d _mm512_mask_erfinv_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the inverse error function of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_erfinv_ps",
        "full_name": "__m512 _mm512_erfinv_ps(__m512 a);",
        "description": "Compute the inverse error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_erfinv_ps",
        "full_name": "__m512 _mm512_mask_erfinv_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cdfnorm_pd",
        "full_name": "__m128d _mm_cdfnorm_pd(__m128d a);",
        "description": "Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cdfnorm_ps",
        "full_name": "__m128 _mm_cdfnorm_ps(__m128 a);",
        "description": "Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cdfnorm_pd",
        "full_name": "__m256d _mm256_cdfnorm_pd(__m256d a);",
        "description": "Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cdfnorm_ps",
        "full_name": "__m256 _mm256_cdfnorm_ps(__m256 a);",
        "description": "Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cdfnorm_pd",
        "full_name": "__m512d _mm512_cdfnorm_pd(__m512d a);",
        "description": "Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cdfnorm_pd",
        "full_name": "__m512d _mm512_mask_cdfnorm_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the cumulative distribution function of packed double-precision (64-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_cdfnorm_ps",
        "full_name": "__m512 _mm512_cdfnorm_ps(__m512 a);",
        "description": "Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cdfnorm_ps",
        "full_name": "__m512 _mm512_mask_cdfnorm_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_rcp14_ps",
        "full_name": "__m128 _mm_rcp14_ps(__m128 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm_rcp14_pd",
        "full_name": "__m128d _mm_rcp14_pd(__m128d a);",
        "description": "Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 2^-14."
    },
    {
        "name": "_mm256_rcp_ps",
        "full_name": "__m256 _mm256_rcp_ps(__m256 a);",
        "description": "Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 1.5*2^-12."
    },
    {
        "name": "_mm_clmulepi64_si128",
        "full_name": "__m128i _mm_clmulepi64_si128(__m128i a, __m128i b, const int imm8);",
        "description": "Perform a carry-less multiplication of two 64-bit integers, selected from \"a\" and \"b\" according to \"imm8\", and store the results in \"dst\"."
    },
    {
        "name": "_cvtu32_mask16",
        "full_name": "__mmask16 _cvtu32_mask16(unsigned int a);",
        "description": "Convert integer value \"a\" into an 16-bit mask, and store the result in \"k\"."
    },
    {
        "name": "_cvtu32_mask32",
        "full_name": "__mmask32 _cvtu32_mask32(unsigned int a);",
        "description": "Convert integer value \"a\" into an 32-bit mask, and store the result in \"k\"."
    },
    {
        "name": "_cvtu32_mask8",
        "full_name": "__mmask8 _cvtu32_mask8(unsigned int a);",
        "description": "Convert integer value \"a\" into an 8-bit mask, and store the result in \"k\"."
    },
    {
        "name": "_mm_cvtu32_sd",
        "full_name": "__m128d _mm_cvtu32_sd(__m128d a, unsigned int b);",
        "description": "Convert the unsigned 32-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvtu32_ss",
        "full_name": "__m128 _mm_cvtu32_ss(__m128 a, unsigned int b);",
        "description": "Convert the unsigned 32-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_cvtu64_mask64",
        "full_name": "__mmask64 _cvtu64_mask64(unsigned __int64 a);",
        "description": "Convert integer value \"a\" into an 64-bit mask, and store the result in \"k\"."
    },
    {
        "name": "_mm_cvtu64_sd",
        "full_name": "__m128d _mm_cvtu64_sd(__m128d a, unsigned __int64 b);",
        "description": "Convert the unsigned 64-bit integer \"b\" to a double-precision (64-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\"."
    },
    {
        "name": "_mm_cvtu64_ss",
        "full_name": "__m128 _mm_cvtu64_ss(__m128 a, unsigned __int64 b);",
        "description": "Convert the unsigned 64-bit integer \"b\" to a single-precision (32-bit) floating-point element, store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\"."
    },
    {
        "name": "_mm_mask_cvtusepi16_storeu_epi8",
        "full_name": "void _mm_mask_cvtusepi16_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtusepi16_storeu_epi8",
        "full_name": "void _mm256_mask_cvtusepi16_storeu_epi8(void *base_addr, __mmask16 k, __m256i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtusepi16_storeu_epi8",
        "full_name": "void _mm512_mask_cvtusepi16_storeu_epi8(void *base_addr, __mmask32 k, __m512i a);",
        "description": "Convert packed unsigned 16-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtusepi32_storeu_epi16",
        "full_name": "void _mm_mask_cvtusepi32_storeu_epi16(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtusepi32_storeu_epi16",
        "full_name": "void _mm256_mask_cvtusepi32_storeu_epi16(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtusepi32_storeu_epi16",
        "full_name": "void _mm512_mask_cvtusepi32_storeu_epi16(void *base_addr, __mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtusepi32_storeu_epi8",
        "full_name": "void _mm_mask_cvtusepi32_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtusepi32_storeu_epi8",
        "full_name": "void _mm256_mask_cvtusepi32_storeu_epi8(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtusepi32_storeu_epi8",
        "full_name": "void _mm512_mask_cvtusepi32_storeu_epi8(void *base_addr, __mmask16 k, __m512i a);",
        "description": "Convert packed unsigned 32-bit integers in \"a\" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtusepi64_storeu_epi16",
        "full_name": "void _mm_mask_cvtusepi64_storeu_epi16(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtusepi64_storeu_epi16",
        "full_name": "void _mm256_mask_cvtusepi64_storeu_epi16(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtusepi64_storeu_epi16",
        "full_name": "void _mm512_mask_cvtusepi64_storeu_epi16(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtusepi64_storeu_epi32",
        "full_name": "void _mm_mask_cvtusepi64_storeu_epi32(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtusepi64_storeu_epi32",
        "full_name": "void _mm256_mask_cvtusepi64_storeu_epi32(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtusepi64_storeu_epi32",
        "full_name": "void _mm512_mask_cvtusepi64_storeu_epi32(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_cvtusepi64_storeu_epi8",
        "full_name": "void _mm_mask_cvtusepi64_storeu_epi8(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_cvtusepi64_storeu_epi8",
        "full_name": "void _mm256_mask_cvtusepi64_storeu_epi8(void *base_addr, __mmask8 k, __m256i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_cvtusepi64_storeu_epi8",
        "full_name": "void _mm512_mask_cvtusepi64_storeu_epi8(void *base_addr, __mmask8 k, __m512i a);",
        "description": "Convert packed unsigned 64-bit integers in \"a\" to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_getmant_pd",
        "full_name": "__m128d _mm_getmant_pd(__m128d ma, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm_maskz_getmant_pd",
        "full_name": "__m128d _mm_maskz_getmant_pd(__mmask8 k, __m128d a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm256_getmant_pd",
        "full_name": "__m256d _mm256_getmant_pd(__m256d a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm256_maskz_getmant_pd",
        "full_name": "__m256d _mm256_maskz_getmant_pd(__mmask8 k, __m256d a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm512_getmant_pd",
        "full_name": "__m512d _mm512_getmant_pd(__m512d a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm512_maskz_getmant_pd",
        "full_name": "__m512d _mm512_maskz_getmant_pd(__mmask8 k, __m512d a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm_getmant_ps",
        "full_name": "__m128 _mm_getmant_ps(__m128 ma, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm_mask_getmant_ps",
        "full_name": "__m128 _mm_mask_getmant_ps(__m128 src, __mmask8 k, __m128 a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set). This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm_maskz_getmant_ps",
        "full_name": "__m128 _mm_maskz_getmant_ps(__mmask8 k, __m128 a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm256_getmant_ps",
        "full_name": "__m256 _mm256_getmant_ps(__m256 a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm256_maskz_getmant_ps",
        "full_name": "__m256 _mm256_maskz_getmant_ps(__mmask8 k, __m256 a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm512_getmant_ps",
        "full_name": "__m512 _mm512_getmant_ps(__m512 a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm512_maskz_getmant_ps",
        "full_name": "__m512 _mm512_maskz_getmant_ps(__mmask16 k, __m512 a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm512_getmant_round_pd",
        "full_name": "__m512d _mm512_getmant_round_pd(__m512d a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc, int sae);",
        "description": "Normalize the mantissas of packed double-precision (64-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note][sae_note]"
    },
    {
        "name": "_mm512_getmant_round_ps",
        "full_name": "__m512 _mm512_getmant_round_ps(__m512 a, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc, int sae);",
        "description": "Normalize the mantissas of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note][sae_note]"
    },
    {
        "name": "_mm_getmant_sd",
        "full_name": "__m128d _mm_getmant_sd(__m128d ma, __m128d mb, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm_getmant_round_sd",
        "full_name": "__m128d _mm_getmant_round_sd(__m128d ma, __m128d mb, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc, int sae);",
        "description": "Normalize the mantissas of the lower double-precision (64-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note][sae_note]"
    },
    {
        "name": "_mm_getmant_ss",
        "full_name": "__m128 _mm_getmant_ss(__m128 ma, __m128 mb, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm_maskz_getmant_ss",
        "full_name": "__m128 _mm_maskz_getmant_ss(__mmask8 k, __m128 a, __m128 b, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc);",
        "description": "Normalize the mantissas of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note]"
    },
    {
        "name": "_mm_getmant_round_ss",
        "full_name": "__m128 _mm_getmant_round_ss(__m128 ma, __m128 mb, _MM_MANTISSA_NORM_ENUM interv, _MM_MANTISSA_SIGN_ENUM sc, int sae);",
        "description": "Normalize the mantissas of the lower single-precision (32-bit) floating-point element in \"b\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". This intrinsic essentially calculates \"±(2^k)*|x.significand|\", where \"k\" depends on the interval range defined by \"interv\" and the sign depends on \"sc\" and the source sign.\n\t[getmant_note][sae_note]"
    },
    {
        "name": "_mm_dbsad_epu8",
        "full_name": "__m128i _mm_dbsad_epu8(__m128i a, __m128i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\".\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm_mask_dbsad_epu8",
        "full_name": "__m128i _mm_mask_dbsad_epu8(__m128i src, __mmask8 k, __m128i a, __m128i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm_maskz_dbsad_epu8",
        "full_name": "__m128i _mm_maskz_dbsad_epu8(__mmask8 k, __m128i a, __m128i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm256_dbsad_epu8",
        "full_name": "__m256i _mm256_dbsad_epu8(__m256i a, __m256i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\".\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected from within 128-bit lanes according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm256_mask_dbsad_epu8",
        "full_name": "__m256i _mm256_mask_dbsad_epu8(__m256i src, __mmask16 k, __m256i a, __m256i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected from within 128-bit lanes according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm256_maskz_dbsad_epu8",
        "full_name": "__m256i _mm256_maskz_dbsad_epu8(__mmask16 k, __m256i a, __m256i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected from within 128-bit lanes according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm512_dbsad_epu8",
        "full_name": "__m512i _mm512_dbsad_epu8(__m512i a, __m512i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\".\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected from within 128-bit lanes according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm512_mask_dbsad_epu8",
        "full_name": "__m512i _mm512_mask_dbsad_epu8(__m512i src, __mmask32 k, __m512i a, __m512i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set).\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected from within 128-bit lanes according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm512_maskz_dbsad_epu8",
        "full_name": "__m512i _mm512_maskz_dbsad_epu8(__mmask32 k, __m512i a, __m512i b, int imm8);",
        "description": "Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in \"a\" compared to those in \"b\", and store the 16-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set).\n\tFour SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from \"a\", and the last two SADs use the uppper 8-bit quadruplet of the lane from \"a\". Quadruplets from \"b\" are selected from within 128-bit lanes according to the control in \"imm8\", and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets."
    },
    {
        "name": "_mm_cexp_ps",
        "full_name": "__m128 _mm_cexp_ps(__m128 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed complex numbers in \"a\", and store the complex results in \"dst\". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number \"complex = vec.fp32[0] + i * vec.fp32[1]\"."
    },
    {
        "name": "_mm256_cexp_ps",
        "full_name": "__m256 _mm256_cexp_ps(__m256 a);",
        "description": "Compute the exponential value of \"e\" raised to the power of packed complex numbers in \"a\", and store the complex results in \"dst\". Each complex number is composed of two adjacent single-precision (32-bit) floating-point elements, which defines the complex number \"complex = vec.fp32[0] + i * vec.fp32[1]\"."
    },
    {
        "name": "_mm_cosd_pd",
        "full_name": "__m128d _mm_cosd_pd(__m128d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cosd_pd",
        "full_name": "__m256d _mm256_cosd_pd(__m256d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cosd_pd",
        "full_name": "__m512d _mm512_cosd_pd(__m512d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cosd_pd",
        "full_name": "__m512d _mm512_mask_cosd_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the cosine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cosd_ps",
        "full_name": "__m128 _mm_cosd_ps(__m128 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cosd_ps",
        "full_name": "__m256 _mm256_cosd_ps(__m256 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cosd_ps",
        "full_name": "__m512 _mm512_cosd_ps(__m512 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cosd_ps",
        "full_name": "__m512 _mm512_mask_cosd_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the cosine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_fixupimm_pd",
        "full_name": "__m128d _mm_fixupimm_pd(__m128d a, __m128d b, __m128i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_mask_fixupimm_pd",
        "full_name": "__m128d _mm_mask_fixupimm_pd(__m128d a, __mmask8 k, __m128d b, __m128i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_maskz_fixupimm_pd",
        "full_name": "__m128d _mm_maskz_fixupimm_pd(__mmask8 k, __m128d a, __m128d b, __m128i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm256_fixupimm_pd",
        "full_name": "__m256d _mm256_fixupimm_pd(__m256d a, __m256d b, __m256i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm256_mask_fixupimm_pd",
        "full_name": "__m256d _mm256_mask_fixupimm_pd(__m256d a, __mmask8 k, __m256d b, __m256i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm256_maskz_fixupimm_pd",
        "full_name": "__m256d _mm256_maskz_fixupimm_pd(__mmask8 k, __m256d a, __m256d b, __m256i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm512_fixupimm_pd",
        "full_name": "__m512d _mm512_fixupimm_pd(__m512d a, __m512d b, __m512i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm512_mask_fixupimm_pd",
        "full_name": "__m512d _mm512_mask_fixupimm_pd(__m512d a, __mmask8 k, __m512d b, __m512i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm512_maskz_fixupimm_pd",
        "full_name": "__m512d _mm512_maskz_fixupimm_pd(__mmask8 k, __m512d a, __m512d b, __m512i c, int imm8);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_fixupimm_ps",
        "full_name": "__m128 _mm_fixupimm_ps(__m128 a, __m128 b, __m128i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_mask_fixupimm_ps",
        "full_name": "__m128 _mm_mask_fixupimm_ps(__m128 a, __mmask8 k, __m128 b, __m128i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_maskz_fixupimm_ps",
        "full_name": "__m128 _mm_maskz_fixupimm_ps(__mmask8 k, __m128 a, __m128 b, __m128i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm256_fixupimm_ps",
        "full_name": "__m256 _mm256_fixupimm_ps(__m256 a, __m256 b, __m256i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm256_mask_fixupimm_ps",
        "full_name": "__m256 _mm256_mask_fixupimm_ps(__m256 a, __mmask8 k, __m256 b, __m256i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm256_maskz_fixupimm_ps",
        "full_name": "__m256 _mm256_maskz_fixupimm_ps(__mmask8 k, __m256 a, __m256 b, __m256i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm512_fixupimm_ps",
        "full_name": "__m512 _mm512_fixupimm_ps(__m512 a, __m512 b, __m512i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm512_mask_fixupimm_ps",
        "full_name": "__m512 _mm512_mask_fixupimm_ps(__m512 a, __mmask16 k, __m512 b, __m512i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm512_maskz_fixupimm_ps",
        "full_name": "__m512 _mm512_maskz_fixupimm_ps(__mmask16 k, __m512 a, __m512 b, __m512i c, int imm8);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm512_fixupimm_round_pd",
        "full_name": "__m512d _mm512_fixupimm_round_pd(__m512d a, __m512d b, __m512i c, int imm8, int sae);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm512_mask_fixupimm_round_pd",
        "full_name": "__m512d _mm512_mask_fixupimm_round_pd(__m512d a, __mmask8 k, __m512d b, __m512i c, int imm8, int sae);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm512_maskz_fixupimm_round_pd",
        "full_name": "__m512d _mm512_maskz_fixupimm_round_pd(__mmask8 k, __m512d a, __m512d b, __m512i c, int imm8, int sae);",
        "description": "Fix up packed double-precision (64-bit) floating-point elements in \"a\" and \"b\" using packed 64-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm512_fixupimm_round_ps",
        "full_name": "__m512 _mm512_fixupimm_round_ps(__m512 a, __m512 b, __m512i c, int imm8, int sae);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm512_mask_fixupimm_round_ps",
        "full_name": "__m512 _mm512_mask_fixupimm_round_ps(__m512 a, __mmask16 k, __m512 b, __m512i c, int imm8, int sae);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm512_maskz_fixupimm_round_ps",
        "full_name": "__m512 _mm512_maskz_fixupimm_round_ps(__mmask16 k, __m512 a, __m512 b, __m512i c, int imm8, int sae);",
        "description": "Fix up packed single-precision (32-bit) floating-point elements in \"a\" and \"b\" using packed 32-bit integers in \"c\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set). \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm_fixupimm_sd",
        "full_name": "__m128d _mm_fixupimm_sd(__m128d a, __m128d b, __m128i c, int imm8);",
        "description": "Fix up the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the lower 64-bit integer in \"c\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_mask_fixupimm_sd",
        "full_name": "__m128d _mm_mask_fixupimm_sd(__m128d a, __mmask8 k, __m128d b, __m128i c, int imm8);",
        "description": "Fix up the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the lower 64-bit integer in \"c\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_maskz_fixupimm_sd",
        "full_name": "__m128d _mm_maskz_fixupimm_sd(__mmask8 k, __m128d a, __m128d b, __m128i c, int imm8);",
        "description": "Fix up the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the lower 64-bit integer in \"c\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_fixupimm_round_sd",
        "full_name": "__m128d _mm_fixupimm_round_sd(__m128d a, __m128d b, __m128i c, int imm8, int sae);",
        "description": "Fix up the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the lower 64-bit integer in \"c\", store the result in the lower element of \"dst\", and copy the upper element from \"a\" to the upper element of \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm_mask_fixupimm_round_sd",
        "full_name": "__m128d _mm_mask_fixupimm_round_sd(__m128d a, __mmask8 k, __m128d b, __m128i c, int imm8, int sae);",
        "description": "Fix up the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the lower 64-bit integer in \"c\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm_maskz_fixupimm_round_sd",
        "full_name": "__m128d _mm_maskz_fixupimm_round_sd(__mmask8 k, __m128d a, __m128d b, __m128i c, int imm8, int sae);",
        "description": "Fix up the lower double-precision (64-bit) floating-point elements in \"a\" and \"b\" using the lower 64-bit integer in \"c\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper element from \"a\" to the upper element of \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm_fixupimm_ss",
        "full_name": "__m128 _mm_fixupimm_ss(__m128 a, __m128 b, __m128i c, int imm8);",
        "description": "Fix up the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the lower 32-bit integer in \"c\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_mask_fixupimm_ss",
        "full_name": "__m128 _mm_mask_fixupimm_ss(__m128 a, __mmask8 k, __m128 b, __m128i c, int imm8);",
        "description": "Fix up the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the lower 32-bit integer in \"c\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_maskz_fixupimm_ss",
        "full_name": "__m128 _mm_maskz_fixupimm_ss(__mmask8 k, __m128 a, __m128 b, __m128i c, int imm8);",
        "description": "Fix up the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the lower 32-bit integer in \"c\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \"imm8\" is used to set the required flags reporting."
    },
    {
        "name": "_mm_fixupimm_round_ss",
        "full_name": "__m128 _mm_fixupimm_round_ss(__m128 a, __m128 b, __m128i c, int imm8, int sae);",
        "description": "Fix up the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the lower 32-bit integer in \"c\", store the result in the lower element of \"dst\", and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm_mask_fixupimm_round_ss",
        "full_name": "__m128 _mm_mask_fixupimm_round_ss(__m128 a, __mmask8 k, __m128 b, __m128i c, int imm8, int sae);",
        "description": "Fix up the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the lower 32-bit integer in \"c\", store the result in the lower element of \"dst\" using writemask \"k\" (the element is copied from \"a\" when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_mm_maskz_fixupimm_round_ss",
        "full_name": "__m128 _mm_maskz_fixupimm_round_ss(__mmask8 k, __m128 a, __m128 b, __m128i c, int imm8, int sae);",
        "description": "Fix up the lower single-precision (32-bit) floating-point elements in \"a\" and \"b\" using the lower 32-bit integer in \"c\", store the result in the lower element of \"dst\" using zeromask \"k\" (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from \"a\" to the upper elements of \"dst\". \"imm8\" is used to set the required flags reporting.\n\t[sae_note]"
    },
    {
        "name": "_cvtsh_ss",
        "full_name": "float _cvtsh_ss(unsigned short a);",
        "description": "Convert the half-precision (16-bit) floating-point value \"a\" to a single-precision (32-bit) floating-point value, and store the result in \"dst\"."
    },
    {
        "name": "_cvtss_sh",
        "full_name": "unsigned short _cvtss_sh(float a, int rounding);",
        "description": "Convert the single-precision (32-bit) floating-point value \"a\" to a half-precision (16-bit) floating-point value, and store the result in \"dst\".\n\t[round_note]"
    },
    {
        "name": "_mm256_sind_pd",
        "full_name": "__m256d _mm256_sind_pd(__m256d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_sind_ps",
        "full_name": "__m256 _mm256_sind_ps(__m256 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_sind_pd",
        "full_name": "__m512d _mm512_sind_pd(__m512d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sind_pd",
        "full_name": "__m512d _mm512_mask_sind_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the sine of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_sind_ps",
        "full_name": "__m512 _mm512_sind_ps(__m512 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_sind_ps",
        "full_name": "__m512 _mm512_mask_sind_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the sine of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_tand_pd",
        "full_name": "__m256d _mm256_tand_pd(__m256d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_tand_ps",
        "full_name": "__m256 _mm256_tand_ps(__m256 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_tand_pd",
        "full_name": "__m512d _mm512_tand_pd(__m512d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_tand_pd",
        "full_name": "__m512d _mm512_mask_tand_pd(__m512d src, __mmask8 k, __m512d a);",
        "description": "Compute the tangent of packed double-precision (64-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_tand_ps",
        "full_name": "__m512 _mm512_tand_ps(__m512 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_tand_ps",
        "full_name": "__m512 _mm512_mask_tand_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the tangent of packed single-precision (32-bit) floating-point elements in \"a\" expressed in degrees, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_erfcinv_ps",
        "full_name": "__m128 _mm_erfcinv_ps(__m128 a);",
        "description": "Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_erfcinv_ps",
        "full_name": "__m256 _mm256_erfcinv_ps(__m256 a);",
        "description": "Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_erfcinv_ps",
        "full_name": "__m512 _mm512_erfcinv_ps(__m512 a);",
        "description": "Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_erfcinv_ps",
        "full_name": "__m512 _mm512_mask_erfcinv_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse complementary error function of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_cdfnorminv_ps",
        "full_name": "__m128 _mm_cdfnorminv_ps(__m128 a);",
        "description": "Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm256_cdfnorminv_ps",
        "full_name": "__m256 _mm256_cdfnorminv_ps(__m256 a);",
        "description": "Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_cdfnorminv_ps",
        "full_name": "__m512 _mm512_cdfnorminv_ps(__m512 a);",
        "description": "Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_cdfnorminv_ps",
        "full_name": "__m512 _mm512_mask_cdfnorminv_ps(__m512 src, __mmask16 k, __m512 a);",
        "description": "Compute the inverse cumulative distribution function of packed single-precision (32-bit) floating-point elements in \"a\" using the normal distribution, and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_bittestandreset",
        "full_name": "unsigned char _bittestandreset(__int32 *a, __int32 b);",
        "description": "Return the bit at index \"b\" of 32-bit integer \"a\", and set that bit to zero."
    },
    {
        "name": "_bittestandreset64",
        "full_name": "unsigned char _bittestandreset64(__int64 *a, __int64 b);",
        "description": "Return the bit at index \"b\" of 64-bit integer \"a\", and set that bit to zero."
    },
    {
        "name": "_bittestandset",
        "full_name": "unsigned char _bittestandset(__int32 *a, __int32 b);",
        "description": "Return the bit at index \"b\" of 32-bit integer \"a\", and set that bit to one."
    },
    {
        "name": "_bittestandset64",
        "full_name": "unsigned char _bittestandset64(__int64 *a, __int64 b);",
        "description": "Return the bit at index \"b\" of 64-bit integer \"a\", and set that bit to one."
    },
    {
        "name": "_bextr2_u32",
        "full_name": "unsigned int _bextr2_u32(unsigned int a, unsigned int control);",
        "description": "Extract contiguous bits from unsigned 32-bit integer \"a\", and store the result in \"dst\". Extract the number of bits specified by bits 15:8 of \"control\", starting at the bit specified by bits 0:7 of \"control\"."
    },
    {
        "name": "_bextr2_u64",
        "full_name": "unsigned __int64 _bextr2_u64(unsigned __int64 a, unsigned __int64 control);",
        "description": "Extract contiguous bits from unsigned 64-bit integer \"a\", and store the result in \"dst\". Extract the number of bits specified by bits 15:8 of \"control\", starting at the bit specified by bits 0:7 of \"control\".."
    },
    {
        "name": "_mm_cvtps_pi16",
        "full_name": "__m64 _mm_cvtps_pi16(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 16-bit integers, and store the results in \"dst\". Note: this intrinsic will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and 0x7FFFFFFF."
    },
    {
        "name": "_mm_cvtps_pi32",
        "full_name": "__m64 _mm_cvtps_pi32(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvtps_pi8",
        "full_name": "__m64 _mm_cvtps_pi8(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 8-bit integers, and store the results in lower 4 elements of \"dst\". Note: this intrinsic will generate 0x7F, rather than 0x80, for input values between 0x7F and 0x7FFFFFFF."
    },
    {
        "name": "_mm_cvtsd_i32",
        "full_name": "int _mm_cvtsd_i32(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvtsd_si32",
        "full_name": "int _mm_cvtsd_si32(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttps_pi32",
        "full_name": "__m64 _mm_cvttps_pi32(__m128 a);",
        "description": "Convert packed single-precision (32-bit) floating-point elements in \"a\" to packed 32-bit integers with truncation, and store the results in \"dst\"."
    },
    {
        "name": "_mm_cvttsd_i32",
        "full_name": "int _mm_cvttsd_i32(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttsd_i64",
        "full_name": "__int64 _mm_cvttsd_i64(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttsd_si32",
        "full_name": "int _mm_cvttsd_si32(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttsd_si64",
        "full_name": "__int64 _mm_cvttsd_si64(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttsd_si64x",
        "full_name": "__int64 _mm_cvttsd_si64x(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttsd_u32",
        "full_name": "unsigned int _mm_cvttsd_u32(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 32-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttsd_u64",
        "full_name": "unsigned __int64 _mm_cvttsd_u64(__m128d a);",
        "description": "Convert the lower double-precision (64-bit) floating-point element in \"a\" to an unsigned 64-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttss_i32",
        "full_name": "int _mm_cvttss_i32(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttss_i64",
        "full_name": "__int64 _mm_cvttss_i64(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttss_si32",
        "full_name": "int _mm_cvttss_si32(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 32-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttss_si64",
        "full_name": "__int64 _mm_cvttss_si64(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to a 64-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttss_u32",
        "full_name": "unsigned int _mm_cvttss_u32(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 32-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm_cvttss_u64",
        "full_name": "unsigned __int64 _mm_cvttss_u64(__m128 a);",
        "description": "Convert the lower single-precision (32-bit) floating-point element in \"a\" to an unsigned 64-bit integer with truncation, and store the result in \"dst\"."
    },
    {
        "name": "_mm512_i32extgather_ps",
        "full_name": "__m512 _mm512_i32extgather_ps(__m512i vindex, void const * base_addr, _MM_UPCONV_PS_ENUM conv, int scale, int hint);",
        "description": "Up-converts 16 memory locations starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\" using \"conv\" to single-precision (32-bit) floating-point elements and stores them in \"dst\". AVX512 only supports _MM_UPCONV_PS_NONE."
    },
    {
        "name": "_mm_i32scatter_pd",
        "full_name": "void _mm_i32scatter_pd(void *base_addr, __m128i vindex, __m128d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_mask_i32scatter_pd",
        "full_name": "void _mm_mask_i32scatter_pd(void *base_addr, __mmask8 k, __m128i vindex, __m128d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_i32scatter_pd",
        "full_name": "void _mm256_i32scatter_pd(void *base_addr, __m128i vindex, __m256d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm256_mask_i32scatter_pd",
        "full_name": "void _mm256_mask_i32scatter_pd(void *base_addr, __mmask8 k, __m128i vindex, __m256d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_i32scatter_pd",
        "full_name": "void _mm512_i32scatter_pd(void *base_addr, __m256i vindex, __m512d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_i32scatter_pd",
        "full_name": "void _mm512_mask_i32scatter_pd(void *base_addr, __mmask8 k, __m256i vindex, __m512d a, const int scale);",
        "description": "Scatter double-precision (64-bit) floating-point elements from \"a\" into memory using 32-bit indices. 64-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not stored when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_i32scatter_ps",
        "full_name": "void _mm_i32scatter_ps(void *base_addr, __m128i vindex, __m128 a, const int scale);",
        "description": "Scatter single-precision (32-bit) floating-point elements from \"a\" into memory using 32-bit indices. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm_rsqrt_ps",
        "full_name": "__m128 _mm_rsqrt_ps(__m128 a);",
        "description": "Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in \"a\", and store the results in \"dst\". The maximum relative error for this approximation is less than 1.5*2^-12."
    },
    {
        "name": "_mm512_prefetch_i64scatter_ps",
        "full_name": "void _mm512_prefetch_i64scatter_ps(void *base_addr, __m512i vindex, int scale, int hint);",
        "description": "Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. Elements are prefetched into cache level \"hint\", where \"hint\" is 0 or 1. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_prefetch_i64scatter_pd",
        "full_name": "void _mm512_prefetch_i64scatter_pd(void *base_addr, __m512i vindex, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_prefetch_i64gather_ps",
        "full_name": "void _mm512_prefetch_i64gather_ps(__m512i vindex, void const * base_addr, int scale, int hint);",
        "description": "Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged in cache. \"scale\" should be 1, 2, 4 or 8. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_prefetch_i64gather_pd",
        "full_name": "void _mm512_prefetch_i64gather_pd(__m512i vindex, void const * base_addr, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by \"hint\" using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_prefetch_i32scatter_ps",
        "full_name": "void _mm512_prefetch_i32scatter_ps(void *base_addr, __m512i vindex, int scale, int hint);",
        "description": "Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\". The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_prefetch_i32scatter_pd",
        "full_name": "void _mm512_prefetch_i32scatter_pd(void *base_addr, __m256i vindex, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_prefetch_i32gather_ps",
        "full_name": "void _mm512_prefetch_i32gather_ps(__m512i vindex, void const * base_addr, int scale, int hint);",
        "description": "Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\". The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_prefetch_i32gather_pd",
        "full_name": "void _mm512_prefetch_i32gather_pd(__m256i vindex, void const * base_addr, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged in cache. \"scale\" should be 1, 2, 4 or 8. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_prefetch_i32extscatter_ps",
        "full_name": "void _mm512_prefetch_i32extscatter_ps(void *base_addr, __m512i vindex, int conv, int scale, int hint);",
        "description": "Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address \"base_addr\" and 32-bit integer index vector \"vindex\" with scale \"scale\" to L1 or L2 level of cache depending on the value of \"hint\", with a request for exclusive ownership. The \"hint\" parameter may be one of the following: _MM_HINT_T0 = 1 for prefetching to L1 cache, _MM_HINT_T1 = 2 for prefetching to L2 cache, _MM_HINT_T2 = 3 for prefetching to L2 cache non-temporal, _MM_HINT_NTA = 0 for prefetching to L1 cache non-temporal. The \"conv\" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the \"conv\" parameter specified for the subsequent scatter intrinsic."
    },
    {
        "name": "_mm512_prefetch_i32extgather_ps",
        "full_name": "void _mm512_prefetch_i32extgather_ps(__m512i vindex, void const * base_addr, int scale, int hint);",
        "description": "Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address \"base_addr\" and 32-bit integer index vector \"vindex\" with scale \"scale\" to L1 or L2 level of cache depending on the value of \"hint\". The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.\nThe \"conv\" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the \"conv\" parameter specified for the subsequent gather intrinsic."
    },
    {
        "name": "_mm512_popcnt_epi8",
        "full_name": "__m512i _mm512_popcnt_epi8(__m512i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_popcnt_epi64",
        "full_name": "__m512i _mm512_popcnt_epi64(__m512i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_popcnt_epi32",
        "full_name": "__m512i _mm512_popcnt_epi32(__m512i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_popcnt_epi16",
        "full_name": "__m512i _mm512_popcnt_epi16(__m512i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_maskz_popcnt_epi8",
        "full_name": "__m512i _mm512_maskz_popcnt_epi8(__mmask64 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_popcnt_epi64",
        "full_name": "__m512i _mm512_maskz_popcnt_epi64(__mmask8 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_popcnt_epi32",
        "full_name": "__m512i _mm512_maskz_popcnt_epi32(__mmask16 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_popcnt_epi16",
        "full_name": "__m512i _mm512_maskz_popcnt_epi16(__mmask32 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_gf2p8affineinv_epi64_epi8",
        "full_name": "__m512i _mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 k, __m512i x, __m512i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_gf2p8affine_epi64_epi8",
        "full_name": "__m512i _mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 k, __m512i x, __m512i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expandloadu_epi8",
        "full_name": "__m512i _mm512_maskz_expandloadu_epi8(__mmask64 k, const void *mem_addr);",
        "description": "Load contiguous active 8-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expandloadu_epi16",
        "full_name": "__m512i _mm512_maskz_expandloadu_epi16(__mmask32 k, const void *mem_addr);",
        "description": "Load contiguous active 16-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_prefetch_i64scatter_ps",
        "full_name": "void _mm512_mask_prefetch_i64scatter_ps(void *base_addr, __mmask16 k, __m512i vindex, int scale, int hint);",
        "description": "Prefetch single-precision (32-bit) floating-point elements with intent to write into memory using 64-bit indices. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 32-bit elements are stored at addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not brought into cache when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_prefetch_i64scatter_pd",
        "full_name": "void _mm512_mask_prefetch_i64scatter_pd(void *base_addr, __mmask8 k, __m512i vindex, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements with intent to write into memory using 64-bit indices. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not brought into cache when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_prefetch_i64gather_ps",
        "full_name": "void _mm512_mask_prefetch_i64gather_ps(__m512i vindex, __mmask8 k, void const * base_addr, int scale, int hint);",
        "description": "Prefetch single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged in cache using writemask \"k\" (elements are only brought into cache when their corresponding mask bit is set). \"scale\" should be 1, 2, 4 or 8.. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_mask_prefetch_i64gather_pd",
        "full_name": "void _mm512_mask_prefetch_i64gather_pd(__m512i vindex, __mmask8 k, void const * base_addr, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements from memory into cache level specified by \"hint\" using 64-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 64-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Prefetched elements are merged in cache using writemask \"k\" (elements are copied from memory when the corresponding mask bit is set). \"scale\" should be 1, 2, 4 or 8. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_mask_prefetch_i32scatter_ps",
        "full_name": "void _mm512_mask_prefetch_i32scatter_ps(void *base_addr, __mmask16 k, __m512i vindex, int scale, int hint);",
        "description": "Prefetches 16 single-precision (32-bit) floating-point elements in memory starting at location \"base_addr\" at packed 32-bit integer indices stored in \"vindex\" scaled by \"scale\". The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. Only those elements whose corresponding mask bit in \"k\" is set are loaded into the desired cache."
    },
    {
        "name": "_mm512_mask_prefetch_i32scatter_pd",
        "full_name": "void _mm512_mask_prefetch_i32scatter_pd(void *base_addr, __mmask8 k, __m256i vindex, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements with intent to write using 32-bit indices. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache. 64-bit elements are brought into cache from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\") subject to mask \"k\" (elements are not brought into cache when the corresponding mask bit is not set). \"scale\" should be 1, 2, 4 or 8."
    },
    {
        "name": "_mm512_mask_prefetch_i32gather_ps",
        "full_name": "void _mm512_mask_prefetch_i32gather_ps(__m512i vindex, __mmask16 k, void const * base_addr, int scale, int hint);",
        "description": "Prefetch single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged in cache using writemask \"k\" (elements are brought into cache only when their corresponding mask bits are set). \"scale\" should be 1, 2, 4 or 8. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_mask_prefetch_i32gather_pd",
        "full_name": "void _mm512_mask_prefetch_i32gather_pd(__m512i vindex, __mmask16 k, void const * base_addr, int scale, int hint);",
        "description": "Prefetch double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at \"base_addr\" and offset by each 32-bit element in \"vindex\" (each index is scaled by the factor in \"scale\"). Gathered elements are merged in cache using writemask \"k\" (elements are brought into cache only when their corresponding mask bits are set). \"scale\" should be 1, 2, 4 or 8. The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache."
    },
    {
        "name": "_mm512_mask_prefetch_i32extscatter_ps",
        "full_name": "void _mm512_mask_prefetch_i32extscatter_ps(void *base_addr, __m512i vindex, int conv, __mmask16 k, int scale, int hint);",
        "description": "Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address \"base_addr\" and 32-bit integer index vector \"vindex\" with scale \"scale\" to L1 or L2 level of cache depending on the value of \"hint\". The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.\nThe \"conv\" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the \"conv\" parameter specified for the subsequent gather intrinsic. Only those elements whose corresponding mask bit in \"k\" is set are loaded into cache."
    },
    {
        "name": "_mm512_mask_prefetch_i32extgather_ps",
        "full_name": "void _mm512_mask_prefetch_i32extgather_ps(__m512i vindex, __mmask16 k, void const * base_addr, int scale, int hint);",
        "description": "Prefetches a set of 16 single-precision (32-bit) memory locations pointed by base address \"base_addr\" and 32-bit integer index vector \"vindex\" with scale \"scale\" to L1 or L2 level of cache depending on the value of \"hint\". Gathered elements are merged in cache using writemask \"k\" (elements are brought into cache only when their corresponding mask bits are set). The \"hint\" parameter may be 1 (_MM_HINT_T0) for prefetching to L1 cache, or 2 (_MM_HINT_T1) for prefetching to L2 cache.\nThe \"conv\" parameter specifies the granularity used by compilers to better encode the instruction. It should be the same as the \"conv\" parameter specified for the subsequent gather intrinsic."
    },
    {
        "name": "_mm512_mask_popcnt_epi8",
        "full_name": "__m512i _mm512_mask_popcnt_epi8(__m512i src, __mmask64 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_popcnt_epi64",
        "full_name": "__m512i _mm512_mask_popcnt_epi64(__m512i src, __mmask8 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_popcnt_epi32",
        "full_name": "__m512i _mm512_mask_popcnt_epi32(__m512i src, __mmask16 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_popcnt_epi16",
        "full_name": "__m512i _mm512_mask_popcnt_epi16(__m512i src, __mmask32 k, __m512i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_gf2p8affineinv_epi64_epi8",
        "full_name": "__m512i _mm512_mask_gf2p8affineinv_epi64_epi8(__m512i src, __mmask64 k, __m512i x, __m512i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_gf2p8affine_epi64_epi8",
        "full_name": "__m512i _mm512_mask_gf2p8affine_epi64_epi8(__m512i src, __mmask64 k, __m512i x, __m512i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expandloadu_epi8",
        "full_name": "__m512i _mm512_mask_expandloadu_epi8(__m512i src, __mmask64 k, const void *mem_addr);",
        "description": "Load contiguous active 8-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expandloadu_epi16",
        "full_name": "__m512i _mm512_mask_expandloadu_epi16(__m512i src, __mmask32 k, const void *mem_addr);",
        "description": "Load contiguous active 16-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_dpwssd_epi32",
        "full_name": "__m512i _mm512_mask_dpwssd_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_compressstoreu_epi8",
        "full_name": "void _mm512_mask_compressstoreu_epi8(void *base_addr, __mmask64 k, __m512i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_compressstoreu_epi16",
        "full_name": "void _mm512_mask_compressstoreu_epi16(void *base_addr, __mmask32 k, __m512i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm512_mask_bitshuffle_epi64_mask",
        "full_name": "__mmask64 _mm512_mask_bitshuffle_epi64_mask(__mmask64 k, __m512i b, __m512i c);",
        "description": "Gather 64 bits from \"b\" using selection bits in \"c\". For each 64-bit element in \"b\", gather 8 bits from the 64-bit element in \"b\" at 8 bit position controlled by the 8 corresponding 8-bit elements of \"c\", and store the result in the corresponding 8-bit element of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_gf2p8affineinv_epi64_epi8",
        "full_name": "__m512i _mm512_gf2p8affineinv_epi64_epi8(__m512i x, __m512i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\"."
    },
    {
        "name": "_mm512_gf2p8affine_epi64_epi8",
        "full_name": "__m512i _mm512_gf2p8affine_epi64_epi8(__m512i x, __m512i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\"."
    },
    {
        "name": "_mm512_dpwssd_epi32",
        "full_name": "__m512i _mm512_dpwssd_epi32(__m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm512_clmulepi64_epi128",
        "full_name": "__m512i _mm512_clmulepi64_epi128(__m512i b, __m512i c, const int Imm8);",
        "description": "Carry-less multiplication of one quadword of\n\t\t'b' by one quadword of 'c', stores\n\t\tthe 128-bit result in 'dst'. The immediate 'Imm8' is\n\t\tused to determine which quadwords of 'b'\n\t\tand 'c' should be used."
    },
    {
        "name": "_mm512_bitshuffle_epi64_mask",
        "full_name": "__mmask64 _mm512_bitshuffle_epi64_mask(__m512i b, __m512i c);",
        "description": "Gather 64 bits from \"b\" using selection bits in \"c\". For each 64-bit element in \"b\", gather 8 bits from the 64-bit element in \"b\" at 8 bit position controlled by the 8 corresponding 8-bit elements of \"c\", and store the result in the corresponding 8-bit element of \"dst\"."
    },
    {
        "name": "_mm512_2intersect_epi64",
        "full_name": "void _mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *k1, __mmask8 *k2);",
        "description": "Compute intersection of packed 64-bit integer vectors \"a\" and \"b\", and store indication of match in the corresponding bit of two mask registers specified by \"k1\" and \"k2\". A match in corresponding elements of \"a\" and \"b\" is indicated by a set bit in the corresponding bit of the mask registers."
    },
    {
        "name": "_mm512_2intersect_epi32",
        "full_name": "void _mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *k1, __mmask16 *k2);",
        "description": "Compute intersection of packed 32-bit integer vectors \"a\" and \"b\", and store indication of match in the corresponding bit of two mask registers specified by \"k1\" and \"k2\". A match in corresponding elements of \"a\" and \"b\" is indicated by a set bit in the corresponding bit of the mask registers."
    },
    {
        "name": "_mm256_popcnt_epi8",
        "full_name": "__m256i _mm256_popcnt_epi8(__m256i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_popcnt_epi64",
        "full_name": "__m256i _mm256_popcnt_epi64(__m256i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_popcnt_epi32",
        "full_name": "__m256i _mm256_popcnt_epi32(__m256i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_popcnt_epi16",
        "full_name": "__m256i _mm256_popcnt_epi16(__m256i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_maskz_popcnt_epi8",
        "full_name": "__m256i _mm256_maskz_popcnt_epi8(__mmask32 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_popcnt_epi64",
        "full_name": "__m256i _mm256_maskz_popcnt_epi64(__mmask8 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_popcnt_epi32",
        "full_name": "__m256i _mm256_maskz_popcnt_epi32(__mmask8 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_popcnt_epi16",
        "full_name": "__m256i _mm256_maskz_popcnt_epi16(__mmask16 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_gf2p8affineinv_epi64_epi8",
        "full_name": "__m256i _mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 k, __m256i x, __m256i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_gf2p8affine_epi64_epi8",
        "full_name": "__m256i _mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 k, __m256i x, __m256i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expandloadu_epi8",
        "full_name": "__m256i _mm256_maskz_expandloadu_epi8(__mmask32 k, const void *mem_addr);",
        "description": "Load contiguous active 8-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expandloadu_epi16",
        "full_name": "__m256i _mm256_maskz_expandloadu_epi16(__mmask16 k, const void *mem_addr);",
        "description": "Load contiguous active 16-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_dpwssd_epi32",
        "full_name": "__m256i _mm256_maskz_dpwssd_epi32(__mmask8 k, __m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_dpbusd_epi32",
        "full_name": "__m256i _mm256_maskz_dpbusd_epi32(__mmask8 k, __m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_dpbusd_epi32",
        "full_name": "__m512i _mm512_dpbusd_epi32(__m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm512_maskz_dpwssd_epi32",
        "full_name": "__m512i _mm512_maskz_dpwssd_epi32(__mmask16 k, __m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_dpbusd_epi32",
        "full_name": "__m512i _mm512_maskz_dpbusd_epi32(__mmask16 k, __m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_dpbusd_epi32",
        "full_name": "__m128i _mm_maskz_dpbusd_epi32(__mmask8 k, __m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_dpbusd_epi32",
        "full_name": "__m512i _mm512_mask_dpbusd_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_popcnt_epi8",
        "full_name": "__m256i _mm256_mask_popcnt_epi8(__m256i src, __mmask32 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_popcnt_epi64",
        "full_name": "__m256i _mm256_mask_popcnt_epi64(__m256i src, __mmask8 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_popcnt_epi32",
        "full_name": "__m256i _mm256_mask_popcnt_epi32(__m256i src, __mmask8 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_popcnt_epi16",
        "full_name": "__m256i _mm256_mask_popcnt_epi16(__m256i src, __mmask16 k, __m256i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_gf2p8affineinv_epi64_epi8",
        "full_name": "__m256i _mm256_mask_gf2p8affineinv_epi64_epi8(__m256i src, __mmask32 k, __m256i x, __m256i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_gf2p8affine_epi64_epi8",
        "full_name": "__m256i _mm256_mask_gf2p8affine_epi64_epi8(__m256i src, __mmask32 k, __m256i x, __m256i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expandloadu_epi8",
        "full_name": "__m256i _mm256_mask_expandloadu_epi8(__m256i src, __mmask32 k, const void *mem_addr);",
        "description": "Load contiguous active 8-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expandloadu_epi16",
        "full_name": "__m256i _mm256_mask_expandloadu_epi16(__m256i src, __mmask16 k, const void *mem_addr);",
        "description": "Load contiguous active 16-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_dpwssd_epi32",
        "full_name": "__m256i _mm256_mask_dpwssd_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_dpbusd_epi32",
        "full_name": "__m256i _mm256_mask_dpbusd_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_compressstoreu_epi8",
        "full_name": "void _mm256_mask_compressstoreu_epi8(void *base_addr, __mmask32 k, __m256i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_compressstoreu_epi16",
        "full_name": "void _mm256_mask_compressstoreu_epi16(void *base_addr, __mmask16 k, __m256i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm256_mask_bitshuffle_epi64_mask",
        "full_name": "__mmask32 _mm256_mask_bitshuffle_epi64_mask(__mmask32 k, __m256i b, __m256i c);",
        "description": "Gather 64 bits from \"b\" using selection bits in \"c\". For each 64-bit element in \"b\", gather 8 bits from the 64-bit element in \"b\" at 8 bit position controlled by the 8 corresponding 8-bit elements of \"c\", and store the result in the corresponding 8-bit element of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_gf2p8affineinv_epi64_epi8",
        "full_name": "__m256i _mm256_gf2p8affineinv_epi64_epi8(__m256i x, __m256i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\"."
    },
    {
        "name": "_mm256_gf2p8affine_epi64_epi8",
        "full_name": "__m256i _mm256_gf2p8affine_epi64_epi8(__m256i x, __m256i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\"."
    },
    {
        "name": "_mm256_dpwssd_epi32",
        "full_name": "__m256i _mm256_dpwssd_epi32(__m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm256_dpbusd_epi32",
        "full_name": "__m256i _mm256_dpbusd_epi32(__m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm256_clmulepi64_epi128",
        "full_name": "__m256i _mm256_clmulepi64_epi128(__m256i b, __m256i c, const int Imm8);",
        "description": "Carry-less multiplication of one quadword of\n\t\t'b' by one quadword of 'c', stores\n\t\tthe 128-bit result in 'dst'. The immediate 'Imm8' is\n\t\tused to determine which quadwords of 'b'\n\t\tand 'c' should be used."
    },
    {
        "name": "_mm256_bitshuffle_epi64_mask",
        "full_name": "__mmask32 _mm256_bitshuffle_epi64_mask(__m256i b, __m256i c);",
        "description": "Gather 64 bits from \"b\" using selection bits in \"c\". For each 64-bit element in \"b\", gather 8 bits from the 64-bit element in \"b\" at 8 bit position controlled by the 8 corresponding 8-bit elements of \"c\", and store the result in the corresponding 8-bit element of \"dst\"."
    },
    {
        "name": "_mm256_2intersect_epi64",
        "full_name": "void _mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *k1, __mmask8 *k2);",
        "description": "Compute intersection of packed 64-bit integer vectors \"a\" and \"b\", and store indication of match in the corresponding bit of two mask registers specified by \"k1\" and \"k2\". A match in corresponding elements of \"a\" and \"b\" is indicated by a set bit in the corresponding bit of the mask registers."
    },
    {
        "name": "_mm256_2intersect_epi32",
        "full_name": "void _mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *k1, __mmask8 *k2);",
        "description": "Compute intersection of packed 32-bit integer vectors \"a\" and \"b\", and store indication of match in the corresponding bit of two mask registers specified by \"k1\" and \"k2\". A match in corresponding elements of \"a\" and \"b\" is indicated by a set bit in the corresponding bit of the mask registers."
    },
    {
        "name": "_mm_sha256msg2_epu32",
        "full_name": "__m128i _mm_sha256msg2_epu32(__m128i a, __m128i b);",
        "description": "Perform the final calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from \"a\" and \"b\", and store the result in \"dst\".\""
    },
    {
        "name": "_mm_sha256msg1_epu32",
        "full_name": "__m128i _mm_sha256msg1_epu32(__m128i a, __m128i b);",
        "description": "Perform an intermediate calculation for the next four SHA256 message values (unsigned 32-bit integers) using previous message values from \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_popcnt_epi8",
        "full_name": "__m128i _mm_popcnt_epi8(__m128i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_popcnt_epi64",
        "full_name": "__m128i _mm_popcnt_epi64(__m128i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_popcnt_epi32",
        "full_name": "__m128i _mm_popcnt_epi32(__m128i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_popcnt_epi16",
        "full_name": "__m128i _mm_popcnt_epi16(__m128i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_maskz_popcnt_epi8",
        "full_name": "__m128i _mm_maskz_popcnt_epi8(__mmask16 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_popcnt_epi64",
        "full_name": "__m128i _mm_maskz_popcnt_epi64(__mmask8 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_popcnt_epi32",
        "full_name": "__m128i _mm_maskz_popcnt_epi32(__mmask8 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_popcnt_epi16",
        "full_name": "__m128i _mm_maskz_popcnt_epi16(__mmask8 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_gf2p8affineinv_epi64_epi8",
        "full_name": "__m128i _mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 k, __m128i x, __m128i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_gf2p8affine_epi64_epi8",
        "full_name": "__m128i _mm_maskz_gf2p8affine_epi64_epi8(__mmask16 k, __m128i x, __m128i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expandloadu_epi8",
        "full_name": "__m128i _mm_maskz_expandloadu_epi8(__mmask16 k, const void *mem_addr);",
        "description": "Load contiguous active 8-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expandloadu_epi16",
        "full_name": "__m128i _mm_maskz_expandloadu_epi16(__mmask8 k, const void *mem_addr);",
        "description": "Load contiguous active 16-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_dpwssd_epi32",
        "full_name": "__m128i _mm_maskz_dpwssd_epi32(__mmask8 k, __m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_popcnt_epi8",
        "full_name": "__m128i _mm_mask_popcnt_epi8(__m128i src, __mmask16 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 8-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_popcnt_epi64",
        "full_name": "__m128i _mm_mask_popcnt_epi64(__m128i src, __mmask8 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 64-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_popcnt_epi32",
        "full_name": "__m128i _mm_mask_popcnt_epi32(__m128i src, __mmask8 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 32-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_popcnt_epi16",
        "full_name": "__m128i _mm_mask_popcnt_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Count the number of logical 1 bits in packed 16-bit integers in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_gf2p8affineinv_epi64_epi8",
        "full_name": "__m128i _mm_mask_gf2p8affineinv_epi64_epi8(__m128i src, __mmask16 k, __m128i x, __m128i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_gf2p8affine_epi64_epi8",
        "full_name": "__m128i _mm_mask_gf2p8affine_epi64_epi8(__m128i src, __mmask16 k, __m128i x, __m128i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expandloadu_epi8",
        "full_name": "__m128i _mm_mask_expandloadu_epi8(__m128i src, __mmask16 k, const void *mem_addr);",
        "description": "Load contiguous active 8-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expandloadu_epi16",
        "full_name": "__m128i _mm_mask_expandloadu_epi16(__m128i src, __mmask8 k, const void *mem_addr);",
        "description": "Load contiguous active 16-bit integers from unaligned memory at \"mem_addr\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_dpwssd_epi32",
        "full_name": "__m128i _mm_mask_dpwssd_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_dpbusd_epi32",
        "full_name": "__m128i _mm_mask_dpbusd_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_compressstoreu_epi8",
        "full_name": "void _mm_mask_compressstoreu_epi8(void *base_addr, __mmask16 k, __m128i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_compressstoreu_epi16",
        "full_name": "void _mm_mask_compressstoreu_epi16(void *base_addr, __mmask8 k, __m128i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to unaligned memory at \"base_addr\"."
    },
    {
        "name": "_mm_mask_bitshuffle_epi64_mask",
        "full_name": "__mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 k, __m128i b, __m128i c);",
        "description": "Gather 64 bits from \"b\" using selection bits in \"c\". For each 64-bit element in \"b\", gather 8 bits from the 64-bit element in \"b\" at 8 bit position controlled by the 8 corresponding 8-bit elements of \"c\", and store the result in the corresponding 8-bit element of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_gf2p8affineinv_epi64_epi8",
        "full_name": "__m128i _mm_gf2p8affineinv_epi64_epi8(__m128i x, __m128i a, int b);",
        "description": "Compute an inverse affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. The inverse of the 8-bit values in \"x\" is defined with respect to the reduction polynomial x^8 + x^4 + x^3 + x + 1. Store the packed 8-bit results in \"dst\"."
    },
    {
        "name": "_mm_gf2p8affine_epi64_epi8",
        "full_name": "__m128i _mm_gf2p8affine_epi64_epi8(__m128i x, __m128i a, int b);",
        "description": "Compute an affine transformation in the Galois Field 2^8. An affine transformation is defined by \"A\" * \"x\" + \"b\", where \"A\" represents an 8 by 8 bit matrix, \"x\" represents an 8-bit vector, and \"b\" is a constant immediate byte. Store the packed 8-bit results in \"dst\"."
    },
    {
        "name": "_mm_dpwssd_epi32",
        "full_name": "__m128i _mm_dpwssd_epi32(__m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm_dpbusd_epi32",
        "full_name": "__m128i _mm_dpbusd_epi32(__m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\", and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm_clwb",
        "full_name": "void _mm_clwb(void const * p);",
        "description": "Write back to memory the cache line that contains \"p\" from any level of the cache hierarchy in the cache coherence domain."
    },
    {
        "name": "_mm_clflushopt",
        "full_name": "void _mm_clflushopt(void const * p);",
        "description": "Invalidate and flush the cache line that contains \"p\" from all levels of the cache hierarchy."
    },
    {
        "name": "_mm_clflush",
        "full_name": "void _mm_clflush(void const * p);",
        "description": "Invalidate and flush the cache line that contains \"p\" from all levels of the cache hierarchy."
    },
    {
        "name": "_mm_bitshuffle_epi64_mask",
        "full_name": "__mmask16 _mm_bitshuffle_epi64_mask(__m128i b, __m128i c);",
        "description": "Gather 64 bits from \"b\" using selection bits in \"c\". For each 64-bit element in \"b\", gather 8 bits from the 64-bit element in \"b\" at 8 bit position controlled by the 8 corresponding 8-bit elements of \"c\", and store the result in the corresponding 8-bit element of \"dst\"."
    },
    {
        "name": "_mm_2intersect_epi64",
        "full_name": "void _mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *k1, __mmask8 *k2);",
        "description": "Compute intersection of packed 64-bit integer vectors \"a\" and \"b\", and store indication of match in the corresponding bit of two mask registers specified by \"k1\" and \"k2\". A match in corresponding elements of \"a\" and \"b\" is indicated by a set bit in the corresponding bit of the mask registers."
    },
    {
        "name": "_mm_2intersect_epi32",
        "full_name": "void _mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *k1, __mmask8 *k2);",
        "description": "Compute intersection of packed 32-bit integer vectors \"a\" and \"b\", and store indication of match in the corresponding bit of two mask registers specified by \"k1\" and \"k2\". A match in corresponding elements of \"a\" and \"b\" is indicated by a set bit in the corresponding bit of the mask registers."
    },
    {
        "name": "_mm_permutex2var_epi8",
        "full_name": "__m128i _mm_permutex2var_epi8(__m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutex2var_epi8",
        "full_name": "__m128i _mm_mask_permutex2var_epi8(__m128i a, __mmask16 k, __m128i idx, __m128i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutex2var_epi8",
        "full_name": "__m128i _mm_maskz_permutex2var_epi8(__mmask16 k, __m128i a, __m128i idx, __m128i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask2_permutex2var_epi8",
        "full_name": "__m128i _mm_mask2_permutex2var_epi8(__m128i a, __m128i idx, __mmask16 k, __m128i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutex2var_epi8",
        "full_name": "__m256i _mm256_permutex2var_epi8(__m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutex2var_epi8",
        "full_name": "__m256i _mm256_mask_permutex2var_epi8(__m256i a, __mmask32 k, __m256i idx, __m256i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutex2var_epi8",
        "full_name": "__m256i _mm256_maskz_permutex2var_epi8(__mmask32 k, __m256i a, __m256i idx, __m256i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask2_permutex2var_epi8",
        "full_name": "__m256i _mm256_mask2_permutex2var_epi8(__m256i a, __m256i idx, __mmask32 k, __m256i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutex2var_epi8",
        "full_name": "__m512i _mm512_permutex2var_epi8(__m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutex2var_epi8",
        "full_name": "__m512i _mm512_mask_permutex2var_epi8(__m512i a, __mmask64 k, __m512i idx, __m512i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutex2var_epi8",
        "full_name": "__m512i _mm512_maskz_permutex2var_epi8(__mmask64 k, __m512i a, __m512i idx, __m512i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask2_permutex2var_epi8",
        "full_name": "__m512i _mm512_mask2_permutex2var_epi8(__m512i a, __m512i idx, __mmask64 k, __m512i b);",
        "description": "Shuffle 8-bit integers in \"a\" and \"b\" across lanes using the corresponding selector and index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_permutexvar_epi8",
        "full_name": "__m128i _mm_permutexvar_epi8(__m128i idx, __m128i a);",
        "description": "Shuffle 8-bit integers in \"a\" using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_permutexvar_epi8",
        "full_name": "__m128i _mm_mask_permutexvar_epi8(__m128i src, __mmask16 k, __m128i idx, __m128i a);",
        "description": "Shuffle 8-bit integers in \"a\" using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_permutexvar_epi8",
        "full_name": "__m128i _mm_maskz_permutexvar_epi8(__mmask16 k, __m128i idx, __m128i a);",
        "description": "Shuffle 8-bit integers in \"a\" using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_permutexvar_epi8",
        "full_name": "__m256i _mm256_permutexvar_epi8(__m256i idx, __m256i a);",
        "description": "Shuffle 8-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_permutexvar_epi8",
        "full_name": "__m256i _mm256_mask_permutexvar_epi8(__m256i src, __mmask32 k, __m256i idx, __m256i a);",
        "description": "Shuffle 8-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_permutexvar_epi8",
        "full_name": "__m256i _mm256_maskz_permutexvar_epi8(__mmask32 k, __m256i idx, __m256i a);",
        "description": "Shuffle 8-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_permutexvar_epi8",
        "full_name": "__m512i _mm512_permutexvar_epi8(__m512i idx, __m512i a);",
        "description": "Shuffle 8-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_permutexvar_epi8",
        "full_name": "__m512i _mm512_mask_permutexvar_epi8(__m512i src, __mmask64 k, __m512i idx, __m512i a);",
        "description": "Shuffle 8-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_permutexvar_epi8",
        "full_name": "__m512i _mm512_maskz_permutexvar_epi8(__mmask64 k, __m512i idx, __m512i a);",
        "description": "Shuffle 8-bit integers in \"a\" across lanes using the corresponding index in \"idx\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_madd52hi_epu64",
        "full_name": "__m128i _mm_madd52hi_epu64(__m128i a, __m128i b, __m128i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"__Y\" and \"__Z\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"__X\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_madd52hi_epu64",
        "full_name": "__m128i _mm_mask_madd52hi_epu64(__m128i a, __mmask8 k, __m128i b, __m128i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_madd52hi_epu64",
        "full_name": "__m128i _mm_maskz_madd52hi_epu64(__mmask8 k, __m128i a, __m128i b, __m128i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_madd52hi_epu64",
        "full_name": "__m256i _mm256_madd52hi_epu64(__m256i a, __m256i b, __m256i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"__Y\" and \"__Z\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"__X\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_madd52hi_epu64",
        "full_name": "__m256i _mm256_mask_madd52hi_epu64(__m256i a, __mmask8 k, __m256i b, __m256i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_madd52hi_epu64",
        "full_name": "__m256i _mm256_maskz_madd52hi_epu64(__mmask8 k, __m256i a, __m256i b, __m256i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_madd52hi_epu64",
        "full_name": "__m512i _mm512_madd52hi_epu64(__m512i a, __m512i b, __m512i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_madd52hi_epu64",
        "full_name": "__m512i _mm512_mask_madd52hi_epu64(__m512i a, __mmask8 k, __m512i b, __m512i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_madd52hi_epu64",
        "full_name": "__m512i _mm512_maskz_madd52hi_epu64(__mmask8 k, __m512i a, __m512i b, __m512i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the high 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_madd52lo_epu64",
        "full_name": "__m128i _mm_madd52lo_epu64(__m128i a, __m128i b, __m128i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"__Y\" and \"__Z\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"__X\", and store the results in \"dst\"."
    },
    {
        "name": "_mm_mask_madd52lo_epu64",
        "full_name": "__m128i _mm_mask_madd52lo_epu64(__m128i a, __mmask8 k, __m128i b, __m128i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_madd52lo_epu64",
        "full_name": "__m128i _mm_maskz_madd52lo_epu64(__mmask8 k, __m128i a, __m128i b, __m128i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_madd52lo_epu64",
        "full_name": "__m256i _mm256_madd52lo_epu64(__m256i a, __m256i b, __m256i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"__Y\" and \"__Z\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"__X\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_mask_madd52lo_epu64",
        "full_name": "__m256i _mm256_mask_madd52lo_epu64(__m256i a, __mmask8 k, __m256i b, __m256i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_madd52lo_epu64",
        "full_name": "__m256i _mm256_maskz_madd52lo_epu64(__mmask8 k, __m256i a, __m256i b, __m256i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_madd52lo_epu64",
        "full_name": "__m512i _mm512_madd52lo_epu64(__m512i a, __m512i b, __m512i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_mask_madd52lo_epu64",
        "full_name": "__m512i _mm512_mask_madd52lo_epu64(__m512i a, __mmask8 k, __m512i b, __m512i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using writemask \"k\" (elements are copied from \"a\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_madd52lo_epu64",
        "full_name": "__m512i _mm512_maskz_madd52lo_epu64(__mmask8 k, __m512i a, __m512i b, __m512i c);",
        "description": "Multiply packed unsigned 52-bit integers in each 64-bit element of \"b\" and \"c\" to form a 104-bit intermediate result. Add the low 52-bit unsigned integer from the intermediate result with the corresponding unsigned 64-bit integer in \"a\", and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_aesdeclast_epi128",
        "full_name": "__m256i _mm256_aesdeclast_epi128(__m256i a, __m256i RoundKey);",
        "description": "Perform the last round of an AES decryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_aesdeclast_epi128",
        "full_name": "__m512i _mm512_aesdeclast_epi128(__m512i a, __m512i RoundKey);",
        "description": "Perform the last round of an AES decryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_aesdec_epi128",
        "full_name": "__m256i _mm256_aesdec_epi128(__m256i a, __m256i RoundKey);",
        "description": "Perform one round of an AES decryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\"."
    },
    {
        "name": "_mm512_aesdec_epi128",
        "full_name": "__m512i _mm512_aesdec_epi128(__m512i a, __m512i RoundKey);",
        "description": "Perform one round of an AES decryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\"."
    },
    {
        "name": "_mm256_aesenc_epi128",
        "full_name": "__m256i _mm256_aesenc_epi128(__m256i a, __m256i RoundKey);",
        "description": "Perform one round of an AES encryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\".\""
    },
    {
        "name": "_mm512_aesenc_epi128",
        "full_name": "__m512i _mm512_aesenc_epi128(__m512i a, __m512i RoundKey);",
        "description": "Perform one round of an AES encryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\".\""
    },
    {
        "name": "_mm256_aesenclast_epi128",
        "full_name": "__m256i _mm256_aesenclast_epi128(__m256i a, __m256i RoundKey);",
        "description": "Perform the last round of an AES encryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\".\""
    },
    {
        "name": "_mm512_aesenclast_epi128",
        "full_name": "__m512i _mm512_aesenclast_epi128(__m512i a, __m512i RoundKey);",
        "description": "Perform the last round of an AES encryption flow on data (state) in \"a\" using the round key in \"RoundKey\", and store the results in \"dst\".\""
    },
    {
        "name": "_mm_dpwssds_epi32",
        "full_name": "__m128i _mm_dpwssds_epi32(__m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm_mask_dpwssds_epi32",
        "full_name": "__m128i _mm_mask_dpwssds_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_dpwssds_epi32",
        "full_name": "__m128i _mm_maskz_dpwssds_epi32(__mmask8 k, __m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_dpwssds_epi32",
        "full_name": "__m256i _mm256_dpwssds_epi32(__m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm256_mask_dpwssds_epi32",
        "full_name": "__m256i _mm256_mask_dpwssds_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_dpwssds_epi32",
        "full_name": "__m256i _mm256_maskz_dpwssds_epi32(__mmask8 k, __m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_dpwssds_epi32",
        "full_name": "__m512i _mm512_dpwssds_epi32(__m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm512_mask_dpwssds_epi32",
        "full_name": "__m512i _mm512_mask_dpwssds_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_dpwssds_epi32",
        "full_name": "__m512i _mm512_maskz_dpwssds_epi32(__mmask16 k, __m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 2 adjacent pairs of signed 16-bit integers in \"a\" with corresponding 16-bit integers in \"b\", producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_dpbusds_epi32",
        "full_name": "__m128i _mm_dpbusds_epi32(__m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm_mask_dpbusds_epi32",
        "full_name": "__m128i _mm_mask_dpbusds_epi32(__m128i src, __mmask8 k, __m128i a, __m128i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_dpbusds_epi32",
        "full_name": "__m128i _mm_maskz_dpbusds_epi32(__mmask8 k, __m128i src, __m128i a, __m128i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_dpbusds_epi32",
        "full_name": "__m256i _mm256_dpbusds_epi32(__m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm256_mask_dpbusds_epi32",
        "full_name": "__m256i _mm256_mask_dpbusds_epi32(__m256i src, __mmask8 k, __m256i a, __m256i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_dpbusds_epi32",
        "full_name": "__m256i _mm256_maskz_dpbusds_epi32(__mmask8 k, __m256i src, __m256i a, __m256i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_dpbusds_epi32",
        "full_name": "__m512i _mm512_dpbusds_epi32(__m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\"."
    },
    {
        "name": "_mm512_mask_dpbusds_epi32",
        "full_name": "__m512i _mm512_mask_dpbusds_epi32(__m512i src, __mmask16 k, __m512i a, __m512i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_dpbusds_epi32",
        "full_name": "__m512i _mm512_maskz_dpbusds_epi32(__mmask16 k, __m512i src, __m512i a, __m512i b);",
        "description": "Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \"a\" with corresponding signed 8-bit integers in \"b\", producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in \"src\" using signed saturation, and store the packed 32-bit results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sha1msg1_epu32",
        "full_name": "__m128i _mm_sha1msg1_epu32(__m128i a, __m128i b);",
        "description": "Perform an intermediate calculation for the next four SHA1 message values (unsigned 32-bit integers) using previous message values from \"a\" and \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_mm_sha1msg2_epu32",
        "full_name": "__m128i _mm_sha1msg2_epu32(__m128i a, __m128i b);",
        "description": "Perform the final calculation for the next four SHA1 message values (unsigned 32-bit integers) using the intermediate result in \"a\" and the previous message values in \"b\", and store the result in \"dst\"."
    },
    {
        "name": "_rotl64",
        "full_name": "unsigned __int64 _rotl64(unsigned __int64 a, const int shift);",
        "description": "Shift the bits of unsigned 64-bit integer \"a\" left by the number of bits specified in \"shift\", rotating the most-significant bit to the least-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_rotr64",
        "full_name": "unsigned __int64 _rotr64(unsigned __int64 a, const int shift);",
        "description": "Shift the bits of unsigned 64-bit integer \"a\" right by the number of bits specified in \"shift\", rotating the least-significant bit to the most-significant bit location, and store the unsigned result in \"dst\"."
    },
    {
        "name": "_directstoreu_u32",
        "full_name": "void _directstoreu_u32(void *dst, unsigned int val);",
        "description": "Store 32-bit integer from \"val\" into memory using direct store."
    },
    {
        "name": "_directstoreu_u64",
        "full_name": "void _directstoreu_u64(void *dst, unsigned __int64 val);",
        "description": "Store 64-bit integer from \"val\" into memory using direct store."
    },
    {
        "name": "_mm_mask_compress_epi8",
        "full_name": "__m128i _mm_mask_compress_epi8(__m128i src, __mmask16 k, __m128i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm_maskz_compress_epi8",
        "full_name": "__m128i _mm_maskz_compress_epi8(__mmask16 k, __m128i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm_mask_compress_epi16",
        "full_name": "__m128i _mm_mask_compress_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm_maskz_compress_epi16",
        "full_name": "__m128i _mm_maskz_compress_epi16(__mmask8 k, __m128i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm256_mask_compress_epi8",
        "full_name": "__m256i _mm256_mask_compress_epi8(__m256i src, __mmask32 k, __m256i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm256_maskz_compress_epi8",
        "full_name": "__m256i _mm256_maskz_compress_epi8(__mmask32 k, __m256i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm256_mask_compress_epi16",
        "full_name": "__m256i _mm256_mask_compress_epi16(__m256i src, __mmask16 k, __m256i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm256_maskz_compress_epi16",
        "full_name": "__m256i _mm256_maskz_compress_epi16(__mmask16 k, __m256i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm512_mask_compress_epi8",
        "full_name": "__m512i _mm512_mask_compress_epi8(__m512i src, __mmask64 k, __m512i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm512_maskz_compress_epi8",
        "full_name": "__m512i _mm512_maskz_compress_epi8(__mmask64 k, __m512i a);",
        "description": "Contiguously store the active 8-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm512_mask_compress_epi16",
        "full_name": "__m512i _mm512_mask_compress_epi16(__m512i src, __mmask32 k, __m512i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in writemask \"k\") to \"dst\", and pass through the remaining elements from \"src\"."
    },
    {
        "name": "_mm512_maskz_compress_epi16",
        "full_name": "__m512i _mm512_maskz_compress_epi16(__mmask32 k, __m512i a);",
        "description": "Contiguously store the active 16-bit integers in \"a\" (those with their respective bit set in zeromask \"k\") to \"dst\", and set the remaining elements to zero."
    },
    {
        "name": "_mm_mask_expand_epi8",
        "full_name": "__m128i _mm_mask_expand_epi8(__m128i src, __mmask16 k, __m128i a);",
        "description": "Load contiguous active 8-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expand_epi8",
        "full_name": "__m128i _mm_maskz_expand_epi8(__mmask16 k, __m128i a);",
        "description": "Load contiguous active 8-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_mask_expand_epi16",
        "full_name": "__m128i _mm_mask_expand_epi16(__m128i src, __mmask8 k, __m128i a);",
        "description": "Load contiguous active 16-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_maskz_expand_epi16",
        "full_name": "__m128i _mm_maskz_expand_epi16(__mmask8 k, __m128i a);",
        "description": "Load contiguous active 16-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expand_epi8",
        "full_name": "__m256i _mm256_mask_expand_epi8(__m256i src, __mmask32 k, __m256i a);",
        "description": "Load contiguous active 8-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expand_epi8",
        "full_name": "__m256i _mm256_maskz_expand_epi8(__mmask32 k, __m256i a);",
        "description": "Load contiguous active 8-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_expand_epi16",
        "full_name": "__m256i _mm256_mask_expand_epi16(__m256i src, __mmask16 k, __m256i a);",
        "description": "Load contiguous active 16-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_expand_epi16",
        "full_name": "__m256i _mm256_maskz_expand_epi16(__mmask16 k, __m256i a);",
        "description": "Load contiguous active 16-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expand_epi8",
        "full_name": "__m512i _mm512_mask_expand_epi8(__m512i src, __mmask64 k, __m512i a);",
        "description": "Load contiguous active 8-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expand_epi8",
        "full_name": "__m512i _mm512_maskz_expand_epi8(__mmask64 k, __m512i a);",
        "description": "Load contiguous active 8-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_expand_epi16",
        "full_name": "__m512i _mm512_mask_expand_epi16(__m512i src, __mmask32 k, __m512i a);",
        "description": "Load contiguous active 16-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_expand_epi16",
        "full_name": "__m512i _mm512_maskz_expand_epi16(__mmask32 k, __m512i a);",
        "description": "Load contiguous active 16-bit integers from \"a\" (those with their respective bit set in mask \"k\"), and store the results in \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_multishift_epi64_epi8",
        "full_name": "__m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\"."
    },
    {
        "name": "_mm_mask_multishift_epi64_epi8",
        "full_name": "__m128i _mm_mask_multishift_epi64_epi8(__m128i src, __mmask16 k, __m128i a, __m128i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_multishift_epi64_epi8",
        "full_name": "__m256i _mm256_multishift_epi64_epi8(__m256i a, __m256i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\"."
    },
    {
        "name": "_mm512_multishift_epi64_epi8",
        "full_name": "__m512i _mm512_multishift_epi64_epi8(__m512i a, __m512i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\"."
    },
    {
        "name": "_mm_maskz_multishift_epi64_epi8",
        "full_name": "__m128i _mm_maskz_multishift_epi64_epi8(__mmask16 k, __m128i a, __m128i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_mask_multishift_epi64_epi8",
        "full_name": "__m256i _mm256_mask_multishift_epi64_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm256_maskz_multishift_epi64_epi8",
        "full_name": "__m256i _mm256_maskz_multishift_epi64_epi8(__mmask32 k, __m256i a, __m256i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_mask_multishift_epi64_epi8",
        "full_name": "__m512i _mm512_mask_multishift_epi64_epi8(__m512i src, __mmask64 k, __m512i a, __m512i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\" using writemask \"k\" (elements are copied from \"src\" when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm512_maskz_multishift_epi64_epi8",
        "full_name": "__m512i _mm512_maskz_multishift_epi64_epi8(__mmask64 k, __m512i a, __m512i b);",
        "description": "For each 64-bit element in \"b\", select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of \"a\", and store the 8 assembled bytes to the corresponding 64-bit element of \"dst\" using zeromask \"k\" (elements are zeroed out when the corresponding mask bit is not set)."
    },
    {
        "name": "_mm_sha256rnds2_epu32",
        "full_name": "__m128i _mm_sha256rnds2_epu32(__m128i a, __m128i b, __m128i k);",
        "description": "Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from \"a\", an initial SHA256 state (A,B,E,F) from \"b\", and a pre-computed sum of the next 2 round message values (unsigned 32-bit integers) and the corresponding round constants from \"k\", and store the updated SHA256 state (A,B,E,F) in \"dst\"."
    },
    {
        "name": "_mm_sha1nexte_epu32",
        "full_name": "__m128i _mm_sha1nexte_epu32(__m128i a, __m128i b);",
        "description": "Calculate SHA1 state variable E after four rounds of operation from the current SHA1 state variable \"a\", add that value to the scheduled values (unsigned 32-bit integers) in \"b\", and store the result in \"dst\"."
    }
  ]
}