; /* sp_x86_64_asm.asm */ ; /* ; * Copyright (C) 2006-2024 wolfSSL Inc. ; * ; * This file is part of wolfSSL. ; * ; * wolfSSL is free software; you can redistribute it and/or modify ; * it under the terms of the GNU General Public License as published by ; * the Free Software Foundation; either version 2 of the License, or ; * (at your option) any later version. ; * ; * wolfSSL is distributed in the hope that it will be useful, ; * but WITHOUT ANY WARRANTY; without even the implied warranty of ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ; * GNU General Public License for more details. ; * ; * You should have received a copy of the GNU General Public License ; * along with this program; if not, write to the Free Software ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA ; */ IF @Version LT 1200 ; AVX2 instructions not recognized by old versions of MASM IFNDEF NO_AVX2_SUPPORT NO_AVX2_SUPPORT = 1 ENDIF ; MOVBE instruction not recognized by old versions of MASM IFNDEF NO_MOVBE_SUPPORT NO_MOVBE_SUPPORT = 1 ENDIF ENDIF IFNDEF HAVE_INTEL_AVX1 HAVE_INTEL_AVX1 = 1 ENDIF IFNDEF NO_AVX2_SUPPORT HAVE_INTEL_AVX2 = 1 ENDIF IFNDEF _WIN64 _WIN64 = 1 ENDIF IFNDEF WOLFSSL_SP_NO_2048 IFNDEF WOLFSSL_SP_NO_2048 ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_2048_from_bin_bswap PROC push r12 push r13 mov r11, r8 mov r12, rcx add r11, r9 add r12, 256 xor r13, r13 jmp L_2048_from_bin_bswap_64_end L_2048_from_bin_bswap_64_start: sub r11, 64 mov rax, QWORD PTR [r11+56] mov r10, QWORD PTR [r11+48] bswap rax bswap r10 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [r11+40] mov r10, QWORD PTR [r11+32] bswap rax bswap r10 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [r11+24] mov r10, QWORD PTR [r11+16] bswap rax bswap r10 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [r11+8] mov r10, QWORD PTR [r11] bswap rax bswap r10 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_2048_from_bin_bswap_64_end: cmp r9, 63 jg L_2048_from_bin_bswap_64_start jmp L_2048_from_bin_bswap_8_end L_2048_from_bin_bswap_8_start: sub r11, 8 mov rax, QWORD PTR [r11] bswap rax mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_2048_from_bin_bswap_8_end: cmp r9, 7 jg L_2048_from_bin_bswap_8_start cmp r9, r13 je L_2048_from_bin_bswap_hi_end mov r10, r13 mov rax, r13 L_2048_from_bin_bswap_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_2048_from_bin_bswap_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_2048_from_bin_bswap_hi_end: cmp rcx, r12 jge L_2048_from_bin_bswap_zero_end L_2048_from_bin_bswap_zero_start: mov QWORD PTR [rcx], r13 add rcx, 8 cmp rcx, r12 jl L_2048_from_bin_bswap_zero_start L_2048_from_bin_bswap_zero_end: pop r13 pop r12 ret sp_2048_from_bin_bswap ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_2048_from_bin_movbe PROC push r12 mov r11, r8 mov r12, rcx add r11, r9 add r12, 256 jmp L_2048_from_bin_movbe_64_end L_2048_from_bin_movbe_64_start: sub r11, 64 movbe rax, QWORD PTR [r11+56] movbe r10, QWORD PTR [r11+48] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 movbe rax, QWORD PTR [r11+40] movbe r10, QWORD PTR [r11+32] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 movbe rax, QWORD PTR [r11+24] movbe r10, QWORD PTR [r11+16] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 movbe rax, QWORD PTR [r11+8] movbe r10, QWORD PTR [r11] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_2048_from_bin_movbe_64_end: cmp r9, 63 jg L_2048_from_bin_movbe_64_start jmp L_2048_from_bin_movbe_8_end L_2048_from_bin_movbe_8_start: sub r11, 8 movbe rax, QWORD PTR [r11] mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_2048_from_bin_movbe_8_end: cmp r9, 7 jg L_2048_from_bin_movbe_8_start cmp r9, 0 je L_2048_from_bin_movbe_hi_end mov r10, 0 mov rax, 0 L_2048_from_bin_movbe_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_2048_from_bin_movbe_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_2048_from_bin_movbe_hi_end: cmp rcx, r12 jge L_2048_from_bin_movbe_zero_end L_2048_from_bin_movbe_zero_start: mov QWORD PTR [rcx], 0 add rcx, 8 cmp rcx, r12 jl L_2048_from_bin_movbe_zero_start L_2048_from_bin_movbe_zero_end: pop r12 ret sp_2048_from_bin_movbe ENDP _text ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 256 ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_2048_to_bin_bswap_32 PROC mov rax, QWORD PTR [rcx+248] mov r8, QWORD PTR [rcx+240] bswap rax bswap r8 mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 mov rax, QWORD PTR [rcx+232] mov r8, QWORD PTR [rcx+224] bswap rax bswap r8 mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 mov rax, QWORD PTR [rcx+216] mov r8, QWORD PTR [rcx+208] bswap rax bswap r8 mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 mov rax, QWORD PTR [rcx+200] mov r8, QWORD PTR [rcx+192] bswap rax bswap r8 mov QWORD PTR [rdx+48], rax mov QWORD PTR [rdx+56], r8 mov rax, QWORD PTR [rcx+184] mov r8, QWORD PTR [rcx+176] bswap rax bswap r8 mov QWORD PTR [rdx+64], rax mov QWORD PTR [rdx+72], r8 mov rax, QWORD PTR [rcx+168] mov r8, QWORD PTR [rcx+160] bswap rax bswap r8 mov QWORD PTR [rdx+80], rax mov QWORD PTR [rdx+88], r8 mov rax, QWORD PTR [rcx+152] mov r8, QWORD PTR [rcx+144] bswap rax bswap r8 mov QWORD PTR [rdx+96], rax mov QWORD PTR [rdx+104], r8 mov rax, QWORD PTR [rcx+136] mov r8, QWORD PTR [rcx+128] bswap rax bswap r8 mov QWORD PTR [rdx+112], rax mov QWORD PTR [rdx+120], r8 mov rax, QWORD PTR [rcx+120] mov r8, QWORD PTR [rcx+112] bswap rax bswap r8 mov QWORD PTR [rdx+128], rax mov QWORD PTR [rdx+136], r8 mov rax, QWORD PTR [rcx+104] mov r8, QWORD PTR [rcx+96] bswap rax bswap r8 mov QWORD PTR [rdx+144], rax mov QWORD PTR [rdx+152], r8 mov rax, QWORD PTR [rcx+88] mov r8, QWORD PTR [rcx+80] bswap rax bswap r8 mov QWORD PTR [rdx+160], rax mov QWORD PTR [rdx+168], r8 mov rax, QWORD PTR [rcx+72] mov r8, QWORD PTR [rcx+64] bswap rax bswap r8 mov QWORD PTR [rdx+176], rax mov QWORD PTR [rdx+184], r8 mov rax, QWORD PTR [rcx+56] mov r8, QWORD PTR [rcx+48] bswap rax bswap r8 mov QWORD PTR [rdx+192], rax mov QWORD PTR [rdx+200], r8 mov rax, QWORD PTR [rcx+40] mov r8, QWORD PTR [rcx+32] bswap rax bswap r8 mov QWORD PTR [rdx+208], rax mov QWORD PTR [rdx+216], r8 mov rax, QWORD PTR [rcx+24] mov r8, QWORD PTR [rcx+16] bswap rax bswap r8 mov QWORD PTR [rdx+224], rax mov QWORD PTR [rdx+232], r8 mov rax, QWORD PTR [rcx+8] mov r8, QWORD PTR [rcx] bswap rax bswap r8 mov QWORD PTR [rdx+240], rax mov QWORD PTR [rdx+248], r8 ret sp_2048_to_bin_bswap_32 ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 256 ; * Uses the movbe instruction which is optional. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_2048_to_bin_movbe_32 PROC movbe rax, QWORD PTR [rcx+248] movbe r8, QWORD PTR [rcx+240] mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 movbe rax, QWORD PTR [rcx+232] movbe r8, QWORD PTR [rcx+224] mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 movbe rax, QWORD PTR [rcx+216] movbe r8, QWORD PTR [rcx+208] mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 movbe rax, QWORD PTR [rcx+200] movbe r8, QWORD PTR [rcx+192] mov QWORD PTR [rdx+48], rax mov QWORD PTR [rdx+56], r8 movbe rax, QWORD PTR [rcx+184] movbe r8, QWORD PTR [rcx+176] mov QWORD PTR [rdx+64], rax mov QWORD PTR [rdx+72], r8 movbe rax, QWORD PTR [rcx+168] movbe r8, QWORD PTR [rcx+160] mov QWORD PTR [rdx+80], rax mov QWORD PTR [rdx+88], r8 movbe rax, QWORD PTR [rcx+152] movbe r8, QWORD PTR [rcx+144] mov QWORD PTR [rdx+96], rax mov QWORD PTR [rdx+104], r8 movbe rax, QWORD PTR [rcx+136] movbe r8, QWORD PTR [rcx+128] mov QWORD PTR [rdx+112], rax mov QWORD PTR [rdx+120], r8 movbe rax, QWORD PTR [rcx+120] movbe r8, QWORD PTR [rcx+112] mov QWORD PTR [rdx+128], rax mov QWORD PTR [rdx+136], r8 movbe rax, QWORD PTR [rcx+104] movbe r8, QWORD PTR [rcx+96] mov QWORD PTR [rdx+144], rax mov QWORD PTR [rdx+152], r8 movbe rax, QWORD PTR [rcx+88] movbe r8, QWORD PTR [rcx+80] mov QWORD PTR [rdx+160], rax mov QWORD PTR [rdx+168], r8 movbe rax, QWORD PTR [rcx+72] movbe r8, QWORD PTR [rcx+64] mov QWORD PTR [rdx+176], rax mov QWORD PTR [rdx+184], r8 movbe rax, QWORD PTR [rcx+56] movbe r8, QWORD PTR [rcx+48] mov QWORD PTR [rdx+192], rax mov QWORD PTR [rdx+200], r8 movbe rax, QWORD PTR [rcx+40] movbe r8, QWORD PTR [rcx+32] mov QWORD PTR [rdx+208], rax mov QWORD PTR [rdx+216], r8 movbe rax, QWORD PTR [rcx+24] movbe r8, QWORD PTR [rcx+16] mov QWORD PTR [rdx+224], rax mov QWORD PTR [rdx+232], r8 movbe rax, QWORD PTR [rcx+8] movbe r8, QWORD PTR [rcx] mov QWORD PTR [rdx+240], rax mov QWORD PTR [rdx+248], r8 ret sp_2048_to_bin_movbe_32 ENDP _text ENDS ENDIF ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_mul_16 PROC push r12 mov r9, rdx sub rsp, 128 ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9] xor r12, r12 mov QWORD PTR [rsp], rax mov r11, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+8], r11 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+16], r12 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+24], r10 ; A[0] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+32], r11 ; A[0] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+40], r12 ; A[0] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+48], r10 ; A[0] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+56], r11 ; A[0] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+64], r12 ; A[0] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+72], r10 ; A[0] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+80], r11 ; A[0] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+88], r12 ; A[0] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+96], r10 ; A[0] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+104], r11 ; A[0] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+112], r12 ; A[0] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+120], r10 ; A[1] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+8] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+128], r11 ; A[2] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+16] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+136], r12 ; A[3] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+24] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+144], r10 ; A[4] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+32] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+152], r11 ; A[5] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+40] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+160], r12 ; A[6] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+48] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+168], r10 ; A[7] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+56] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+176], r11 ; A[8] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+64] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+184], r12 ; A[9] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+72] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+192], r10 ; A[10] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+80] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+200], r11 ; A[11] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+88] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+208], r12 ; A[12] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+96] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+216], r10 ; A[13] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+104] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+224], r11 ; A[14] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+112] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+232], r12 ; A[15] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx mov QWORD PTR [rcx+240], r10 mov QWORD PTR [rcx+248], r11 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r10, QWORD PTR [rsp+16] mov r11, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r10, QWORD PTR [rsp+48] mov r11, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rsp+64] mov rdx, QWORD PTR [rsp+72] mov r10, QWORD PTR [rsp+80] mov r11, QWORD PTR [rsp+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], rdx mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rsp+96] mov rdx, QWORD PTR [rsp+104] mov r10, QWORD PTR [rsp+112] mov r11, QWORD PTR [rsp+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], rdx mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 add rsp, 128 pop r12 ret sp_2048_mul_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r Result of multiplication. ; * a First number to multiply. ; * b Second number to multiply. ; */ _text SEGMENT READONLY PARA sp_2048_mul_avx2_16 PROC push rbx push rbp push r12 push r13 push r14 push r15 push rdi mov rbp, r8 mov r8, rcx mov r9, rdx sub rsp, 128 cmp r9, r8 mov rbx, rsp cmovne rbx, r8 cmp rbp, r8 cmove rbx, rsp add r8, 128 xor rdi, rdi mov rdx, QWORD PTR [r9] ; A[0] * B[0] mulx r11, r10, QWORD PTR [rbp] ; A[0] * B[1] mulx r12, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx], r10 adcx r11, rax ; A[0] * B[2] mulx r13, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+8], r11 adcx r12, rax ; A[0] * B[3] mulx r14, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+16], r12 adcx r13, rax mov QWORD PTR [rbx+24], r13 ; A[0] * B[4] mulx r10, rax, QWORD PTR [rbp+32] adcx r14, rax ; A[0] * B[5] mulx r11, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+32], r14 adcx r10, rax ; A[0] * B[6] mulx r12, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+40], r10 adcx r11, rax ; A[0] * B[7] mulx r13, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+48], r11 adcx r12, rax mov QWORD PTR [rbx+56], r12 ; A[0] * B[8] mulx r14, rax, QWORD PTR [rbp+64] adcx r13, rax ; A[0] * B[9] mulx r10, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+64], r13 adcx r14, rax ; A[0] * B[10] mulx r11, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+72], r14 adcx r10, rax ; A[0] * B[11] mulx r12, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+80], r10 adcx r11, rax mov QWORD PTR [rbx+88], r11 ; A[0] * B[12] mulx r13, rax, QWORD PTR [rbp+96] adcx r12, rax ; A[0] * B[13] mulx r14, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+96], r12 adcx r13, rax ; A[0] * B[14] mulx r10, rax, QWORD PTR [rbp+112] mov QWORD PTR [rbx+104], r13 adcx r14, rax ; A[0] * B[15] mulx r11, rax, QWORD PTR [rbp+120] mov QWORD PTR [rbx+112], r14 adcx r10, rax adcx r11, rdi mov r15, rdi adcx r15, rdi mov QWORD PTR [rbx+120], r10 mov QWORD PTR [r8], r11 mov rdx, QWORD PTR [r9+8] mov r11, QWORD PTR [rbx+8] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] ; A[1] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[1] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+8], r11 adcx r12, rax adox r13, rcx ; A[1] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r14, rcx ; A[1] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+24], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+32], r14 mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] ; A[1] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[1] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx ; A[1] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[1] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+64], r13 mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] ; A[1] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r14, rax adox r10, rcx ; A[1] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[1] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[1] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+96], r12 mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[1] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r13, rax adox r14, rcx ; A[1] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[1] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[1] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [rbx+120], r10 mov r12, rdi adcx r11, rax adox r12, rcx adcx r12, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8], r11 mov QWORD PTR [r8+8], r12 mov rdx, QWORD PTR [r9+16] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] ; A[2] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[2] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r14, rcx ; A[2] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+24], r13 adcx r14, rax adox r10, rcx ; A[2] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+32], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+40], r10 mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] ; A[2] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[2] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[2] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[2] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+72], r14 mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] ; A[2] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[2] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[2] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[2] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+104], r13 mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[2] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r14, rax adox r10, rcx ; A[2] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[2] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[2] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8], r11 mov r13, rdi adcx r12, rax adox r13, rcx adcx r13, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+8], r12 mov QWORD PTR [r8+16], r13 mov rdx, QWORD PTR [r9+24] mov r13, QWORD PTR [rbx+24] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] ; A[3] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r14, rcx ; A[3] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+24], r13 adcx r14, rax adox r10, rcx ; A[3] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+32], r14 adcx r10, rax adox r11, rcx ; A[3] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+48], r11 mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] ; A[3] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r13, rcx ; A[3] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[3] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[3] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+80], r10 mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] ; A[3] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[3] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[3] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[3] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+112], r14 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[3] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r10, rax adox r11, rcx ; A[3] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[3] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[3] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+8], r12 mov r14, rdi adcx r13, rax adox r14, rcx adcx r14, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+16], r13 mov QWORD PTR [r8+24], r14 mov rdx, QWORD PTR [r9+32] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] ; A[4] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r14, rax adox r10, rcx ; A[4] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+32], r14 adcx r10, rax adox r11, rcx ; A[4] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx ; A[4] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+56], r12 mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] ; A[4] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r13, rax adox r14, rcx ; A[4] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[4] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[4] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+88], r11 mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[4] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r13, rcx ; A[4] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[4] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[4] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+120], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[4] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r11, rax adox r12, rcx ; A[4] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[4] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[4] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+16], r13 mov r10, rdi adcx r14, rax adox r10, rcx adcx r10, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+24], r14 mov QWORD PTR [r8+32], r10 mov rdx, QWORD PTR [r9+40] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] ; A[5] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[5] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx ; A[5] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[5] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+64], r13 mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] ; A[5] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r14, rax adox r10, rcx ; A[5] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[5] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[5] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+96], r12 mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[5] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r13, rax adox r14, rcx ; A[5] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[5] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[5] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] ; A[5] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r12, rax adox r13, rcx ; A[5] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[5] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[5] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+24], r14 mov r11, rdi adcx r10, rax adox r11, rcx adcx r11, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+32], r10 mov QWORD PTR [r8+40], r11 mov rdx, QWORD PTR [r9+48] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] ; A[6] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[6] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[6] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[6] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+72], r14 mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] ; A[6] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[6] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[6] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[6] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+104], r13 mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[6] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r14, rax adox r10, rcx ; A[6] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[6] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[6] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[6] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r13, rax adox r14, rcx ; A[6] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[6] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[6] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+32], r10 mov r12, rdi adcx r11, rax adox r12, rcx adcx r12, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+40], r11 mov QWORD PTR [r8+48], r12 mov rdx, QWORD PTR [r9+56] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] ; A[7] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[7] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[7] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[7] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+80], r10 mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] ; A[7] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[7] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[7] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[7] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+112], r14 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[7] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[7] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[7] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[7] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+16], r13 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] ; A[7] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r14, rax adox r10, rcx ; A[7] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[7] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[7] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+40], r11 mov r13, rdi adcx r12, rax adox r13, rcx adcx r13, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+48], r12 mov QWORD PTR [r8+56], r13 mov rdx, QWORD PTR [r9+64] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] ; A[8] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r14, rcx ; A[8] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[8] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[8] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+88], r11 mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[8] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r13, rcx ; A[8] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[8] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[8] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+120], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] ; A[8] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[8] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[8] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[8] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [r8+24], r14 mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] ; A[8] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r10, rax adox r11, rcx ; A[8] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[8] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[8] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+48], r12 mov r14, rdi adcx r13, rax adox r14, rcx adcx r14, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+56], r13 mov QWORD PTR [r8+64], r14 mov rdx, QWORD PTR [r9+72] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] ; A[9] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r14, rax adox r10, rcx ; A[9] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[9] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[9] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+96], r12 mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[9] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r13, rax adox r14, rcx ; A[9] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[9] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[9] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[9] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r13, rcx ; A[9] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[9] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[9] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+32], r10 mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] ; A[9] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r11, rax adox r12, rcx ; A[9] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[9] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[9] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+56], r13 mov r10, rdi adcx r14, rax adox r10, rcx adcx r10, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+64], r14 mov QWORD PTR [r8+72], r10 mov rdx, QWORD PTR [r9+80] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] ; A[10] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[10] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[10] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[10] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+104], r13 mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[10] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r14, rax adox r10, rcx ; A[10] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[10] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[10] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] ; A[10] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r13, rax adox r14, rcx ; A[10] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[10] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[10] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+40], r11 mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] ; A[10] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r12, rax adox r13, rcx ; A[10] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[10] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[10] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+64], r14 mov r11, rdi adcx r10, rax adox r11, rcx adcx r11, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+72], r10 mov QWORD PTR [r8+80], r11 mov rdx, QWORD PTR [r9+88] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] ; A[11] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[11] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[11] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[11] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+112], r14 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[11] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[11] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[11] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[11] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+16], r13 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] ; A[11] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r14, rax adox r10, rcx ; A[11] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[11] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[11] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+48], r12 mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] ; A[11] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r13, rax adox r14, rcx ; A[11] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[11] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx ; A[11] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+72], r10 mov r12, rdi adcx r11, rax adox r12, rcx adcx r12, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+80], r11 mov QWORD PTR [r8+88], r12 mov rdx, QWORD PTR [r9+96] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[12] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[12] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[12] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[12] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+120], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] ; A[12] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[12] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[12] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[12] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [r8+24], r14 mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] ; A[12] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[12] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[12] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[12] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+56], r13 mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] mov r12, QWORD PTR [r8+88] ; A[12] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r14, rax adox r10, rcx ; A[12] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx ; A[12] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+72], r10 adcx r11, rax adox r12, rcx ; A[12] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+80], r11 mov r13, rdi adcx r12, rax adox r13, rcx adcx r13, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+88], r12 mov QWORD PTR [r8+96], r13 mov rdx, QWORD PTR [r9+104] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[13] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r14, rcx ; A[13] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[13] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[13] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[13] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r13, rcx ; A[13] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[13] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[13] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+32], r10 mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] ; A[13] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[13] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[13] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[13] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [r8+64], r14 mov r11, QWORD PTR [r8+80] mov r12, QWORD PTR [r8+88] mov r13, QWORD PTR [r8+96] ; A[13] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r10, rax adox r11, rcx ; A[13] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+72], r10 adcx r11, rax adox r12, rcx ; A[13] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+80], r11 adcx r12, rax adox r13, rcx ; A[13] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+88], r12 mov r14, rdi adcx r13, rax adox r14, rcx adcx r14, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+96], r13 mov QWORD PTR [r8+104], r14 mov rdx, QWORD PTR [r9+112] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[14] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r14, rax adox r10, rcx ; A[14] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[14] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[14] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] ; A[14] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r13, rax adox r14, rcx ; A[14] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[14] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[14] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+40], r11 mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] ; A[14] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r13, rcx ; A[14] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[14] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[14] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+72], r10 mov r12, QWORD PTR [r8+88] mov r13, QWORD PTR [r8+96] mov r14, QWORD PTR [r8+104] ; A[14] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r11, rax adox r12, rcx ; A[14] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+80], r11 adcx r12, rax adox r13, rcx ; A[14] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+88], r12 adcx r13, rax adox r14, rcx ; A[14] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+96], r13 mov r10, rdi adcx r14, rax adox r10, rcx adcx r10, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+104], r14 mov QWORD PTR [r8+112], r10 mov rdx, QWORD PTR [r9+120] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[15] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[15] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[15] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[15] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+16], r13 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] ; A[15] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r14, rax adox r10, rcx ; A[15] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[15] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[15] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+48], r12 mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] mov r12, QWORD PTR [r8+88] ; A[15] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r13, rax adox r14, rcx ; A[15] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[15] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx ; A[15] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+72], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+80], r11 mov r13, QWORD PTR [r8+96] mov r14, QWORD PTR [r8+104] mov r10, QWORD PTR [r8+112] ; A[15] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r12, rax adox r13, rcx ; A[15] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+88], r12 adcx r13, rax adox r14, rcx ; A[15] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+96], r13 adcx r14, rax adox r10, rcx ; A[15] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+104], r14 mov r11, rdi adcx r10, rax adox r11, rcx adcx r11, r15 mov QWORD PTR [r8+112], r10 mov QWORD PTR [r8+120], r11 sub r8, 128 cmp r9, r8 je L_start_2048_mul_avx2_16 cmp rbp, r8 jne L_end_2048_mul_avx2_16 L_start_2048_mul_avx2_16: vmovdqu xmm0, OWORD PTR [rbx] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbx+16] vmovups OWORD PTR [r8+16], xmm0 vmovdqu xmm0, OWORD PTR [rbx+32] vmovups OWORD PTR [r8+32], xmm0 vmovdqu xmm0, OWORD PTR [rbx+48] vmovups OWORD PTR [r8+48], xmm0 vmovdqu xmm0, OWORD PTR [rbx+64] vmovups OWORD PTR [r8+64], xmm0 vmovdqu xmm0, OWORD PTR [rbx+80] vmovups OWORD PTR [r8+80], xmm0 vmovdqu xmm0, OWORD PTR [rbx+96] vmovups OWORD PTR [r8+96], xmm0 vmovdqu xmm0, OWORD PTR [rbx+112] vmovups OWORD PTR [r8+112], xmm0 L_end_2048_mul_avx2_16: add rsp, 128 pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx ret sp_2048_mul_avx2_16 ENDP _text ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_add_16 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 adc r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 adc r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 adc r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 adc r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 adc r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 adc r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 adc r10, QWORD PTR [r8+120] mov QWORD PTR [rcx+120], r10 adc rax, 0 ret sp_2048_add_16 ENDP _text ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_sub_in_place_32 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] mov r9, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb r9, QWORD PTR [rdx+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], r9 sbb r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb r9, QWORD PTR [rdx+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], r9 sbb r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb r9, QWORD PTR [rdx+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], r9 sbb r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb r9, QWORD PTR [rdx+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], r9 sbb r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb r9, QWORD PTR [rdx+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], r9 sbb r8, QWORD PTR [rdx+80] mov r9, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb r9, QWORD PTR [rdx+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], r9 sbb r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb r9, QWORD PTR [rdx+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], r9 sbb r8, QWORD PTR [rdx+112] mov r9, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb r9, QWORD PTR [rdx+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], r9 sbb r8, QWORD PTR [rdx+128] mov r9, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb r9, QWORD PTR [rdx+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], r9 sbb r8, QWORD PTR [rdx+144] mov r9, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb r9, QWORD PTR [rdx+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], r9 sbb r8, QWORD PTR [rdx+160] mov r9, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb r9, QWORD PTR [rdx+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], r9 sbb r8, QWORD PTR [rdx+176] mov r9, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb r9, QWORD PTR [rdx+184] mov r8, QWORD PTR [rcx+192] mov QWORD PTR [rcx+184], r9 sbb r8, QWORD PTR [rdx+192] mov r9, QWORD PTR [rcx+200] mov QWORD PTR [rcx+192], r8 sbb r9, QWORD PTR [rdx+200] mov r8, QWORD PTR [rcx+208] mov QWORD PTR [rcx+200], r9 sbb r8, QWORD PTR [rdx+208] mov r9, QWORD PTR [rcx+216] mov QWORD PTR [rcx+208], r8 sbb r9, QWORD PTR [rdx+216] mov r8, QWORD PTR [rcx+224] mov QWORD PTR [rcx+216], r9 sbb r8, QWORD PTR [rdx+224] mov r9, QWORD PTR [rcx+232] mov QWORD PTR [rcx+224], r8 sbb r9, QWORD PTR [rdx+232] mov r8, QWORD PTR [rcx+240] mov QWORD PTR [rcx+232], r9 sbb r8, QWORD PTR [rdx+240] mov r9, QWORD PTR [rcx+248] mov QWORD PTR [rcx+240], r8 sbb r9, QWORD PTR [rdx+248] mov QWORD PTR [rcx+248], r9 sbb rax, rax ret sp_2048_sub_in_place_32 ENDP _text ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_add_32 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 adc r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 adc r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 adc r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 adc r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 adc r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 adc r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 adc r10, QWORD PTR [r8+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [rcx+120], r10 adc r9, QWORD PTR [r8+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [rcx+128], r9 adc r10, QWORD PTR [r8+136] mov r9, QWORD PTR [rdx+144] mov QWORD PTR [rcx+136], r10 adc r9, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+152] mov QWORD PTR [rcx+144], r9 adc r10, QWORD PTR [r8+152] mov r9, QWORD PTR [rdx+160] mov QWORD PTR [rcx+152], r10 adc r9, QWORD PTR [r8+160] mov r10, QWORD PTR [rdx+168] mov QWORD PTR [rcx+160], r9 adc r10, QWORD PTR [r8+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [rcx+168], r10 adc r9, QWORD PTR [r8+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [rcx+176], r9 adc r10, QWORD PTR [r8+184] mov r9, QWORD PTR [rdx+192] mov QWORD PTR [rcx+184], r10 adc r9, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+200] mov QWORD PTR [rcx+192], r9 adc r10, QWORD PTR [r8+200] mov r9, QWORD PTR [rdx+208] mov QWORD PTR [rcx+200], r10 adc r9, QWORD PTR [r8+208] mov r10, QWORD PTR [rdx+216] mov QWORD PTR [rcx+208], r9 adc r10, QWORD PTR [r8+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [rcx+216], r10 adc r9, QWORD PTR [r8+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [rcx+224], r9 adc r10, QWORD PTR [r8+232] mov r9, QWORD PTR [rdx+240] mov QWORD PTR [rcx+232], r10 adc r9, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+248] mov QWORD PTR [rcx+240], r9 adc r10, QWORD PTR [r8+248] mov QWORD PTR [rcx+248], r10 adc rax, 0 ret sp_2048_add_32 ENDP _text ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_mul_32 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 808 mov QWORD PTR [rsp+768], rcx mov QWORD PTR [rsp+776], rdx mov QWORD PTR [rsp+784], r8 lea r12, QWORD PTR [rsp+512] lea r14, QWORD PTR [rdx+128] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r12+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [rdx+104] mov QWORD PTR [r12+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [rdx+112] mov QWORD PTR [r12+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [rdx+120] mov QWORD PTR [r12+112], r10 adc rax, QWORD PTR [r14+120] mov QWORD PTR [r12+120], rax adc r15, 0 mov QWORD PTR [rsp+792], r15 lea r13, QWORD PTR [rsp+640] lea r14, QWORD PTR [r8+128] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [r8+96] mov QWORD PTR [r13+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [r8+104] mov QWORD PTR [r13+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [r8+112] mov QWORD PTR [r13+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [r8+120] mov QWORD PTR [r13+112], r10 adc rax, QWORD PTR [r14+120] mov QWORD PTR [r13+120], rax adc rdi, 0 mov QWORD PTR [rsp+800], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_2048_mul_16 mov r8, QWORD PTR [rsp+784] mov rdx, QWORD PTR [rsp+776] lea rcx, QWORD PTR [rsp+256] add r8, 128 add rdx, 128 call sp_2048_mul_16 mov r8, QWORD PTR [rsp+784] mov rdx, QWORD PTR [rsp+776] mov rcx, QWORD PTR [rsp+768] call sp_2048_mul_16 IFDEF _WIN64 mov r8, QWORD PTR [rsp+784] mov rdx, QWORD PTR [rsp+776] mov rcx, QWORD PTR [rsp+768] ENDIF mov r15, QWORD PTR [rsp+792] mov rdi, QWORD PTR [rsp+800] mov rsi, QWORD PTR [rsp+768] mov r11, r15 lea r12, QWORD PTR [rsp+512] lea r13, QWORD PTR [rsp+640] and r11, rdi neg r15 neg rdi add rsi, 256 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] and rax, rdi and r9, r15 mov QWORD PTR [r12], rax mov QWORD PTR [r13], r9 mov rax, QWORD PTR [r12+8] mov r9, QWORD PTR [r13+8] and rax, rdi and r9, r15 mov QWORD PTR [r12+8], rax mov QWORD PTR [r13+8], r9 mov rax, QWORD PTR [r12+16] mov r9, QWORD PTR [r13+16] and rax, rdi and r9, r15 mov QWORD PTR [r12+16], rax mov QWORD PTR [r13+16], r9 mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] and rax, rdi and r9, r15 mov QWORD PTR [r12+24], rax mov QWORD PTR [r13+24], r9 mov rax, QWORD PTR [r12+32] mov r9, QWORD PTR [r13+32] and rax, rdi and r9, r15 mov QWORD PTR [r12+32], rax mov QWORD PTR [r13+32], r9 mov rax, QWORD PTR [r12+40] mov r9, QWORD PTR [r13+40] and rax, rdi and r9, r15 mov QWORD PTR [r12+40], rax mov QWORD PTR [r13+40], r9 mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] and rax, rdi and r9, r15 mov QWORD PTR [r12+48], rax mov QWORD PTR [r13+48], r9 mov rax, QWORD PTR [r12+56] mov r9, QWORD PTR [r13+56] and rax, rdi and r9, r15 mov QWORD PTR [r12+56], rax mov QWORD PTR [r13+56], r9 mov rax, QWORD PTR [r12+64] mov r9, QWORD PTR [r13+64] and rax, rdi and r9, r15 mov QWORD PTR [r12+64], rax mov QWORD PTR [r13+64], r9 mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] and rax, rdi and r9, r15 mov QWORD PTR [r12+72], rax mov QWORD PTR [r13+72], r9 mov rax, QWORD PTR [r12+80] mov r9, QWORD PTR [r13+80] and rax, rdi and r9, r15 mov QWORD PTR [r12+80], rax mov QWORD PTR [r13+80], r9 mov rax, QWORD PTR [r12+88] mov r9, QWORD PTR [r13+88] and rax, rdi and r9, r15 mov QWORD PTR [r12+88], rax mov QWORD PTR [r13+88], r9 mov rax, QWORD PTR [r12+96] mov r9, QWORD PTR [r13+96] and rax, rdi and r9, r15 mov QWORD PTR [r12+96], rax mov QWORD PTR [r13+96], r9 mov rax, QWORD PTR [r12+104] mov r9, QWORD PTR [r13+104] and rax, rdi and r9, r15 mov QWORD PTR [r12+104], rax mov QWORD PTR [r13+104], r9 mov rax, QWORD PTR [r12+112] mov r9, QWORD PTR [r13+112] and rax, rdi and r9, r15 mov QWORD PTR [r12+112], rax mov QWORD PTR [r13+112], r9 mov rax, QWORD PTR [r12+120] mov r9, QWORD PTR [r13+120] and rax, rdi and r9, r15 mov QWORD PTR [r12+120], rax mov QWORD PTR [r13+120], r9 mov rax, QWORD PTR [r12] add rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov QWORD PTR [rsi+120], rax adc r11, 0 lea r13, QWORD PTR [rsp+256] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [r13+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [r13+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [r13+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [r13+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [r13+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [r13+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [r13+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [r13+248] mov QWORD PTR [r12+248], r9 sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [rcx+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [rcx+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [rcx+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [rcx+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [rcx+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [rcx+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [rcx+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [rcx+248] mov QWORD PTR [r12+248], r9 sbb r11, 0 sub rsi, 128 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r12+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r12+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r12+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r12+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r12+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r12+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r12+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r12+248] mov QWORD PTR [rsi+248], r9 adc r11, 0 mov QWORD PTR [rcx+384], r11 add rsi, 128 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov QWORD PTR [rsi+128], r9 ; Add to zero mov rax, QWORD PTR [r13+136] adc rax, 0 mov r9, QWORD PTR [r13+144] mov QWORD PTR [rsi+136], rax adc r9, 0 mov r10, QWORD PTR [r13+152] mov QWORD PTR [rsi+144], r9 adc r10, 0 mov rax, QWORD PTR [r13+160] mov QWORD PTR [rsi+152], r10 adc rax, 0 mov r9, QWORD PTR [r13+168] mov QWORD PTR [rsi+160], rax adc r9, 0 mov r10, QWORD PTR [r13+176] mov QWORD PTR [rsi+168], r9 adc r10, 0 mov rax, QWORD PTR [r13+184] mov QWORD PTR [rsi+176], r10 adc rax, 0 mov r9, QWORD PTR [r13+192] mov QWORD PTR [rsi+184], rax adc r9, 0 mov r10, QWORD PTR [r13+200] mov QWORD PTR [rsi+192], r9 adc r10, 0 mov rax, QWORD PTR [r13+208] mov QWORD PTR [rsi+200], r10 adc rax, 0 mov r9, QWORD PTR [r13+216] mov QWORD PTR [rsi+208], rax adc r9, 0 mov r10, QWORD PTR [r13+224] mov QWORD PTR [rsi+216], r9 adc r10, 0 mov rax, QWORD PTR [r13+232] mov QWORD PTR [rsi+224], r10 adc rax, 0 mov r9, QWORD PTR [r13+240] mov QWORD PTR [rsi+232], rax adc r9, 0 mov r10, QWORD PTR [r13+248] mov QWORD PTR [rsi+240], r9 adc r10, 0 mov QWORD PTR [rsi+248], r10 add rsp, 808 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_2048_mul_32 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_mul_avx2_32 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 808 mov QWORD PTR [rsp+768], rcx mov QWORD PTR [rsp+776], rdx mov QWORD PTR [rsp+784], r8 lea r12, QWORD PTR [rsp+512] lea r14, QWORD PTR [rdx+128] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r12+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [rdx+104] mov QWORD PTR [r12+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [rdx+112] mov QWORD PTR [r12+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [rdx+120] mov QWORD PTR [r12+112], r10 adc rax, QWORD PTR [r14+120] mov QWORD PTR [r12+120], rax adc r15, 0 mov QWORD PTR [rsp+792], r15 lea r13, QWORD PTR [rsp+640] lea r14, QWORD PTR [r8+128] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [r8+96] mov QWORD PTR [r13+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [r8+104] mov QWORD PTR [r13+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [r8+112] mov QWORD PTR [r13+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [r8+120] mov QWORD PTR [r13+112], r10 adc rax, QWORD PTR [r14+120] mov QWORD PTR [r13+120], rax adc rdi, 0 mov QWORD PTR [rsp+800], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_2048_mul_avx2_16 mov r8, QWORD PTR [rsp+784] mov rdx, QWORD PTR [rsp+776] lea rcx, QWORD PTR [rsp+256] add r8, 128 add rdx, 128 call sp_2048_mul_avx2_16 mov r8, QWORD PTR [rsp+784] mov rdx, QWORD PTR [rsp+776] mov rcx, QWORD PTR [rsp+768] call sp_2048_mul_avx2_16 IFDEF _WIN64 mov r8, QWORD PTR [rsp+784] mov rdx, QWORD PTR [rsp+776] mov rcx, QWORD PTR [rsp+768] ENDIF mov r15, QWORD PTR [rsp+792] mov rdi, QWORD PTR [rsp+800] mov rsi, QWORD PTR [rsp+768] mov r11, r15 lea r12, QWORD PTR [rsp+512] lea r13, QWORD PTR [rsp+640] and r11, rdi neg r15 neg rdi add rsi, 256 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] pext rax, rax, rdi pext r9, r9, r15 add rax, r9 mov r9, QWORD PTR [r12+8] mov r10, QWORD PTR [r13+8] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi], rax adc r9, r10 mov r10, QWORD PTR [r12+16] mov rax, QWORD PTR [r13+16] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+8], r9 adc r10, rax mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+16], r10 adc rax, r9 mov r9, QWORD PTR [r12+32] mov r10, QWORD PTR [r13+32] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+24], rax adc r9, r10 mov r10, QWORD PTR [r12+40] mov rax, QWORD PTR [r13+40] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+32], r9 adc r10, rax mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+40], r10 adc rax, r9 mov r9, QWORD PTR [r12+56] mov r10, QWORD PTR [r13+56] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+48], rax adc r9, r10 mov r10, QWORD PTR [r12+64] mov rax, QWORD PTR [r13+64] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+56], r9 adc r10, rax mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+64], r10 adc rax, r9 mov r9, QWORD PTR [r12+80] mov r10, QWORD PTR [r13+80] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+72], rax adc r9, r10 mov r10, QWORD PTR [r12+88] mov rax, QWORD PTR [r13+88] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+80], r9 adc r10, rax mov rax, QWORD PTR [r12+96] mov r9, QWORD PTR [r13+96] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+88], r10 adc rax, r9 mov r9, QWORD PTR [r12+104] mov r10, QWORD PTR [r13+104] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+96], rax adc r9, r10 mov r10, QWORD PTR [r12+112] mov rax, QWORD PTR [r13+112] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+104], r9 adc r10, rax mov rax, QWORD PTR [r12+120] mov r9, QWORD PTR [r13+120] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+112], r10 adc rax, r9 mov QWORD PTR [rsi+120], rax adc r11, 0 lea r13, QWORD PTR [rsp+256] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [r13+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [r13+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [r13+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [r13+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [r13+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [r13+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [r13+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [r13+248] mov QWORD PTR [r12+248], r9 sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [rcx+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [rcx+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [rcx+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [rcx+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [rcx+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [rcx+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [rcx+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [rcx+248] mov QWORD PTR [r12+248], r9 sbb r11, 0 sub rsi, 128 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r12+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r12+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r12+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r12+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r12+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r12+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r12+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r12+248] mov QWORD PTR [rsi+248], r9 adc r11, 0 mov QWORD PTR [rcx+384], r11 add rsi, 128 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov QWORD PTR [rsi+128], r9 ; Add to zero mov rax, QWORD PTR [r13+136] adc rax, 0 mov r9, QWORD PTR [r13+144] mov QWORD PTR [rsi+136], rax adc r9, 0 mov r10, QWORD PTR [r13+152] mov QWORD PTR [rsi+144], r9 adc r10, 0 mov rax, QWORD PTR [r13+160] mov QWORD PTR [rsi+152], r10 adc rax, 0 mov r9, QWORD PTR [r13+168] mov QWORD PTR [rsi+160], rax adc r9, 0 mov r10, QWORD PTR [r13+176] mov QWORD PTR [rsi+168], r9 adc r10, 0 mov rax, QWORD PTR [r13+184] mov QWORD PTR [rsi+176], r10 adc rax, 0 mov r9, QWORD PTR [r13+192] mov QWORD PTR [rsi+184], rax adc r9, 0 mov r10, QWORD PTR [r13+200] mov QWORD PTR [rsi+192], r9 adc r10, 0 mov rax, QWORD PTR [r13+208] mov QWORD PTR [rsi+200], r10 adc rax, 0 mov r9, QWORD PTR [r13+216] mov QWORD PTR [rsi+208], rax adc r9, 0 mov r10, QWORD PTR [r13+224] mov QWORD PTR [rsi+216], r9 adc r10, 0 mov rax, QWORD PTR [r13+232] mov QWORD PTR [rsi+224], r10 adc rax, 0 mov r9, QWORD PTR [r13+240] mov QWORD PTR [rsi+232], rax adc r9, 0 mov r10, QWORD PTR [r13+248] mov QWORD PTR [rsi+240], r9 adc r10, 0 mov QWORD PTR [rsi+248], r10 add rsp, 808 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_2048_mul_avx2_32 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_sqr_16 PROC push r12 push r13 push r14 mov r8, rdx sub rsp, 128 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax xor r11, r11 mov QWORD PTR [rsp], rax mov r10, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+8], r10 ; A[0] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rsp+16], r11 ; A[0] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[1] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+8] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+24], r9 ; A[0] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[1] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+8] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+32], r10 ; A[0] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+40], r11 ; A[0] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+48], r9 ; A[0] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+56], r10 ; A[0] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[4] mov rax, QWORD PTR [r8+32] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+64], r11 ; A[0] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+72], r9 ; A[0] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[5] mov rax, QWORD PTR [r8+40] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+80], r10 ; A[0] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+88], r11 ; A[0] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[6] mov rax, QWORD PTR [r8+48] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+96], r9 ; A[0] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+104], r10 ; A[0] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[7] mov rax, QWORD PTR [r8+56] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+112], r11 ; A[0] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+120], r9 ; A[1] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[2] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[8] mov rax, QWORD PTR [r8+64] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+128], r10 ; A[2] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+16] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[3] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+136], r11 ; A[3] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+24] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[4] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[9] mov rax, QWORD PTR [r8+72] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+144], r9 ; A[4] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+32] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[5] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+152], r10 ; A[5] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+40] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[6] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[10] mov rax, QWORD PTR [r8+80] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+160], r11 ; A[6] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+48] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[7] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+168], r9 ; A[7] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+56] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[8] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 ; A[11] * A[11] mov rax, QWORD PTR [r8+88] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+176], r10 ; A[8] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+64] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[9] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 ; A[11] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+88] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+184], r11 ; A[9] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+72] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[10] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 ; A[11] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+88] add r12, rax adc r13, rdx adc r14, 0 ; A[12] * A[12] mov rax, QWORD PTR [r8+96] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+192], r9 ; A[10] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+80] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[11] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+88] add r12, rax adc r13, rdx adc r14, 0 ; A[12] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+96] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+200], r10 ; A[11] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+88] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[12] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+96] add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[13] * A[13] mov rax, QWORD PTR [r8+104] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+208], r11 ; A[12] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+96] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[13] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+104] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+216], r9 ; A[13] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+104] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[14] * A[14] mov rax, QWORD PTR [r8+112] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rcx+224], r10 ; A[14] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+112] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+232], r11 ; A[15] * A[15] mov rax, QWORD PTR [r8+120] mul rax add r9, rax adc r10, rdx mov QWORD PTR [rcx+240], r9 mov QWORD PTR [rcx+248], r10 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r12, QWORD PTR [rsp+16] mov r13, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r12, QWORD PTR [rsp+48] mov r13, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r12 mov QWORD PTR [rcx+56], r13 mov rax, QWORD PTR [rsp+64] mov rdx, QWORD PTR [rsp+72] mov r12, QWORD PTR [rsp+80] mov r13, QWORD PTR [rsp+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], rdx mov QWORD PTR [rcx+80], r12 mov QWORD PTR [rcx+88], r13 mov rax, QWORD PTR [rsp+96] mov rdx, QWORD PTR [rsp+104] mov r12, QWORD PTR [rsp+112] mov r13, QWORD PTR [rsp+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], rdx mov QWORD PTR [rcx+112], r12 mov QWORD PTR [rcx+120], r13 add rsp, 128 pop r14 pop r13 pop r12 ret sp_2048_sqr_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_sqr_avx2_16 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov r8, rcx mov r9, rdx sub rsp, 128 cmp r9, r8 mov rbp, rsp cmovne rbp, r8 add r8, 128 xor r13, r13 ; Diagonal 1 ; Zero into %r9 ; Zero into %r10 ; A[1] x A[0] mov rdx, QWORD PTR [r9] mulx r11, r10, QWORD PTR [r9+8] ; A[2] x A[0] mulx r12, rax, QWORD PTR [r9+16] adcx r11, rax adox r12, r13 mov QWORD PTR [rbp+8], r10 mov QWORD PTR [rbp+16], r11 ; Zero into %r8 ; Zero into %r9 ; A[3] x A[0] mulx r10, rax, QWORD PTR [r9+24] adcx r12, rax adox r10, r13 ; A[4] x A[0] mulx r11, rax, QWORD PTR [r9+32] adcx r10, rax adox r11, r13 mov QWORD PTR [rbp+24], r12 mov QWORD PTR [rbp+32], r10 ; Zero into %r10 ; Zero into %r8 ; A[5] x A[0] mulx r12, rax, QWORD PTR [r9+40] adcx r11, rax adox r12, r13 ; A[6] x A[0] mulx r10, rax, QWORD PTR [r9+48] adcx r12, rax adox r10, r13 mov QWORD PTR [rbp+40], r11 mov QWORD PTR [rbp+48], r12 ; Zero into %r9 ; Zero into %r10 ; A[7] x A[0] mulx r11, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, r13 ; A[8] x A[0] mulx r12, rax, QWORD PTR [r9+64] adcx r11, rax adox r12, r13 mov QWORD PTR [rbp+56], r10 mov QWORD PTR [rbp+64], r11 ; Zero into %r8 ; Zero into %r9 ; A[9] x A[0] mulx r10, rax, QWORD PTR [r9+72] adcx r12, rax adox r10, r13 ; A[10] x A[0] mulx r11, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, r13 mov QWORD PTR [rbp+72], r12 mov QWORD PTR [rbp+80], r10 ; No load %r13 - %r10 ; A[11] x A[0] mulx r15, rax, QWORD PTR [r9+88] adcx r11, rax adox r15, r13 ; A[12] x A[0] mulx rdi, rax, QWORD PTR [r9+96] adcx r15, rax adox rdi, r13 mov QWORD PTR [rbp+88], r11 ; No store %r13 - %r10 ; No load %r15 - %r9 ; A[13] x A[0] mulx rsi, rax, QWORD PTR [r9+104] adcx rdi, rax adox rsi, r13 ; A[14] x A[0] mulx rbx, rax, QWORD PTR [r9+112] adcx rsi, rax adox rbx, r13 ; No store %r14 - %r8 ; No store %r15 - %r9 ; Zero into %r8 ; Zero into %r9 ; A[15] x A[0] mulx r10, rax, QWORD PTR [r9+120] adcx rbx, rax adox r10, r13 ; No store %rbx - %r10 ; Carry adcx r10, r13 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8], r10 ; Diagonal 2 mov r10, QWORD PTR [rbp+24] mov r11, QWORD PTR [rbp+32] mov r12, QWORD PTR [rbp+40] ; A[2] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, QWORD PTR [r9+16] adcx r10, rax adox r11, rcx ; A[3] x A[1] mulx rcx, rax, QWORD PTR [r9+24] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+24], r10 mov QWORD PTR [rbp+32], r11 mov r10, QWORD PTR [rbp+48] mov r11, QWORD PTR [rbp+56] ; A[4] x A[1] mulx rcx, rax, QWORD PTR [r9+32] adcx r12, rax adox r10, rcx ; A[5] x A[1] mulx rcx, rax, QWORD PTR [r9+40] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+40], r12 mov QWORD PTR [rbp+48], r10 mov r12, QWORD PTR [rbp+64] mov r10, QWORD PTR [rbp+72] ; A[6] x A[1] mulx rcx, rax, QWORD PTR [r9+48] adcx r11, rax adox r12, rcx ; A[7] x A[1] mulx rcx, rax, QWORD PTR [r9+56] adcx r12, rax adox r10, rcx mov QWORD PTR [rbp+56], r11 mov QWORD PTR [rbp+64], r12 mov r11, QWORD PTR [rbp+80] mov r12, QWORD PTR [rbp+88] ; A[8] x A[1] mulx rcx, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, rcx ; A[9] x A[1] mulx rcx, rax, QWORD PTR [r9+72] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+72], r10 mov QWORD PTR [rbp+80], r11 ; No load %r13 - %r8 ; A[10] x A[1] mulx rcx, rax, QWORD PTR [r9+80] adcx r12, rax adox r15, rcx ; A[11] x A[1] mulx rcx, rax, QWORD PTR [r9+88] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r12 ; No store %r13 - %r8 ; No load %r15 - %r10 ; A[12] x A[1] mulx rcx, rax, QWORD PTR [r9+96] adcx rdi, rax adox rsi, rcx ; A[13] x A[1] mulx rcx, rax, QWORD PTR [r9+104] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r9 ; No store %r15 - %r10 mov r11, QWORD PTR [r8] ; Zero into %r10 ; A[14] x A[1] mulx rcx, rax, QWORD PTR [r9+112] adcx rbx, rax adox r11, rcx ; A[15] x A[1] mulx r12, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, r13 ; No store %rbx - %r8 mov QWORD PTR [r8], r11 ; Zero into %r8 ; Zero into %r9 ; A[15] x A[2] mov rdx, QWORD PTR [r9+16] mulx r10, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+8], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+16], r10 ; Diagonal 3 mov r10, QWORD PTR [rbp+40] mov r11, QWORD PTR [rbp+48] mov r12, QWORD PTR [rbp+56] ; A[3] x A[2] mulx rcx, rax, QWORD PTR [r9+24] adcx r10, rax adox r11, rcx ; A[4] x A[2] mulx rcx, rax, QWORD PTR [r9+32] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+40], r10 mov QWORD PTR [rbp+48], r11 mov r10, QWORD PTR [rbp+64] mov r11, QWORD PTR [rbp+72] ; A[5] x A[2] mulx rcx, rax, QWORD PTR [r9+40] adcx r12, rax adox r10, rcx ; A[6] x A[2] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+56], r12 mov QWORD PTR [rbp+64], r10 mov r12, QWORD PTR [rbp+80] mov r10, QWORD PTR [rbp+88] ; A[7] x A[2] mulx rcx, rax, QWORD PTR [r9+56] adcx r11, rax adox r12, rcx ; A[8] x A[2] mulx rcx, rax, QWORD PTR [r9+64] adcx r12, rax adox r10, rcx mov QWORD PTR [rbp+72], r11 mov QWORD PTR [rbp+80], r12 ; No load %r13 - %r9 ; A[9] x A[2] mulx rcx, rax, QWORD PTR [r9+72] adcx r10, rax adox r15, rcx ; A[10] x A[2] mulx rcx, rax, QWORD PTR [r9+80] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r10 ; No store %r13 - %r9 ; No load %r15 - %r8 ; A[11] x A[2] mulx rcx, rax, QWORD PTR [r9+88] adcx rdi, rax adox rsi, rcx ; A[12] x A[2] mulx rcx, rax, QWORD PTR [r9+96] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r10 ; No store %r15 - %r8 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [r8+8] ; A[13] x A[2] mulx rcx, rax, QWORD PTR [r9+104] adcx rbx, rax adox r12, rcx ; A[14] x A[2] mulx rcx, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, rcx ; No store %rbx - %r9 mov QWORD PTR [r8], r12 mov r11, QWORD PTR [r8+16] ; Zero into %r10 ; A[14] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+112] adcx r10, rax adox r11, rcx ; A[14] x A[4] mov rdx, QWORD PTR [r9+32] mulx r12, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+8], r10 mov QWORD PTR [r8+16], r11 ; Zero into %r8 ; Zero into %r9 ; A[14] x A[5] mov rdx, QWORD PTR [r9+40] mulx r10, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+24], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+32], r10 ; Diagonal 4 mov r10, QWORD PTR [rbp+56] mov r11, QWORD PTR [rbp+64] mov r12, QWORD PTR [rbp+72] ; A[4] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+32] adcx r10, rax adox r11, rcx ; A[5] x A[3] mulx rcx, rax, QWORD PTR [r9+40] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+56], r10 mov QWORD PTR [rbp+64], r11 mov r10, QWORD PTR [rbp+80] mov r11, QWORD PTR [rbp+88] ; A[6] x A[3] mulx rcx, rax, QWORD PTR [r9+48] adcx r12, rax adox r10, rcx ; A[7] x A[3] mulx rcx, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+72], r12 mov QWORD PTR [rbp+80], r10 ; No load %r13 - %r10 ; A[8] x A[3] mulx rcx, rax, QWORD PTR [r9+64] adcx r11, rax adox r15, rcx ; A[9] x A[3] mulx rcx, rax, QWORD PTR [r9+72] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r11 ; No store %r13 - %r10 ; No load %r15 - %r9 ; A[10] x A[3] mulx rcx, rax, QWORD PTR [r9+80] adcx rdi, rax adox rsi, rcx ; A[11] x A[3] mulx rcx, rax, QWORD PTR [r9+88] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r8 ; No store %r15 - %r9 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[12] x A[3] mulx rcx, rax, QWORD PTR [r9+96] adcx rbx, rax adox r10, rcx ; A[13] x A[3] mulx rcx, rax, QWORD PTR [r9+104] adcx r10, rax adox r11, rcx ; No store %rbx - %r10 mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[13] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+104] adcx r11, rax adox r12, rcx ; A[13] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+8], r11 mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] ; Zero into %r10 ; A[13] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+104] adcx r10, rax adox r11, rcx ; A[13] x A[7] mov rdx, QWORD PTR [r9+56] mulx r12, rax, QWORD PTR [r9+104] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+24], r10 mov QWORD PTR [r8+32], r11 ; Zero into %r8 ; Zero into %r9 ; A[13] x A[8] mov rdx, QWORD PTR [r9+64] mulx r10, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+40], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+48], r10 ; Diagonal 5 mov r10, QWORD PTR [rbp+72] mov r11, QWORD PTR [rbp+80] mov r12, QWORD PTR [rbp+88] ; A[5] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+40] adcx r10, rax adox r11, rcx ; A[6] x A[4] mulx rcx, rax, QWORD PTR [r9+48] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+72], r10 mov QWORD PTR [rbp+80], r11 ; No load %r13 - %r8 ; A[7] x A[4] mulx rcx, rax, QWORD PTR [r9+56] adcx r12, rax adox r15, rcx ; A[8] x A[4] mulx rcx, rax, QWORD PTR [r9+64] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r12 ; No store %r13 - %r8 ; No load %r15 - %r10 ; A[9] x A[4] mulx rcx, rax, QWORD PTR [r9+72] adcx rdi, rax adox rsi, rcx ; A[10] x A[4] mulx rcx, rax, QWORD PTR [r9+80] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r9 ; No store %r15 - %r10 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[11] x A[4] mulx rcx, rax, QWORD PTR [r9+88] adcx rbx, rax adox r11, rcx ; A[12] x A[4] mulx rcx, rax, QWORD PTR [r9+96] adcx r11, rax adox r12, rcx ; No store %rbx - %r8 mov QWORD PTR [r8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] ; A[12] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+96] adcx r12, rax adox r10, rcx ; A[12] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+96] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+8], r12 mov QWORD PTR [r8+16], r10 mov r12, QWORD PTR [r8+32] mov r10, QWORD PTR [r8+40] ; A[12] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+96] adcx r11, rax adox r12, rcx ; A[12] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+96] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+24], r11 mov QWORD PTR [r8+32], r12 mov r11, QWORD PTR [r8+48] ; Zero into %r10 ; A[12] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+96] adcx r10, rax adox r11, rcx ; A[12] x A[10] mov rdx, QWORD PTR [r9+80] mulx r12, rax, QWORD PTR [r9+96] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+40], r10 mov QWORD PTR [r8+48], r11 ; Zero into %r8 ; Zero into %r9 ; A[12] x A[11] mov rdx, QWORD PTR [r9+88] mulx r10, rax, QWORD PTR [r9+96] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+56], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+64], r10 ; Diagonal 6 mov r10, QWORD PTR [rbp+88] ; No load %r13 - %r9 ; A[6] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r15, rcx ; A[7] x A[5] mulx rcx, rax, QWORD PTR [r9+56] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r10 ; No store %r13 - %r9 ; No load %r15 - %r8 ; A[8] x A[5] mulx rcx, rax, QWORD PTR [r9+64] adcx rdi, rax adox rsi, rcx ; A[9] x A[5] mulx rcx, rax, QWORD PTR [r9+72] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r10 ; No store %r15 - %r8 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [r8+8] ; A[10] x A[5] mulx rcx, rax, QWORD PTR [r9+80] adcx rbx, rax adox r12, rcx ; A[11] x A[5] mulx rcx, rax, QWORD PTR [r9+88] adcx r12, rax adox r10, rcx ; No store %rbx - %r9 mov QWORD PTR [r8], r12 mov r11, QWORD PTR [r8+16] mov r12, QWORD PTR [r8+24] ; A[11] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, rcx ; A[11] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+88] adcx r11, rax adox r12, rcx mov QWORD PTR [r8+8], r10 mov QWORD PTR [r8+16], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[11] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+88] adcx r12, rax adox r10, rcx ; A[11] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r12 mov QWORD PTR [r8+32], r10 mov r12, QWORD PTR [r8+48] mov r10, QWORD PTR [r8+56] ; A[11] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+88] adcx r11, rax adox r12, rcx ; A[13] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+40], r11 mov QWORD PTR [r8+48], r12 mov r11, QWORD PTR [r8+64] ; Zero into %r10 ; A[13] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+104] adcx r10, rax adox r11, rcx ; A[13] x A[11] mov rdx, QWORD PTR [r9+88] mulx r12, rax, QWORD PTR [r9+104] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+56], r10 mov QWORD PTR [r8+64], r11 ; Zero into %r8 ; Zero into %r9 ; A[13] x A[12] mov rdx, QWORD PTR [r9+96] mulx r10, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+72], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+80], r10 ; Diagonal 7 ; No load %r15 - %r9 ; A[7] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+56] adcx rdi, rax adox rsi, rcx ; A[8] x A[6] mulx rcx, rax, QWORD PTR [r9+64] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r8 ; No store %r15 - %r9 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[9] x A[6] mulx rcx, rax, QWORD PTR [r9+72] adcx rbx, rax adox r10, rcx ; A[10] x A[6] mulx rcx, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, rcx ; No store %rbx - %r10 mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[10] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+80] adcx r11, rax adox r12, rcx ; A[10] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+80] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+8], r11 mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] mov r12, QWORD PTR [r8+40] ; A[10] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, rcx ; A[14] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, rcx mov QWORD PTR [r8+24], r10 mov QWORD PTR [r8+32], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] ; A[14] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, rcx ; A[14] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+112] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+40], r12 mov QWORD PTR [r8+48], r10 mov r12, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] ; A[14] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, rcx ; A[14] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+56], r11 mov QWORD PTR [r8+64], r12 mov r11, QWORD PTR [r8+80] ; Zero into %r10 ; A[14] x A[11] mov rdx, QWORD PTR [r9+88] mulx rcx, rax, QWORD PTR [r9+112] adcx r10, rax adox r11, rcx ; A[14] x A[12] mov rdx, QWORD PTR [r9+96] mulx r12, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+72], r10 mov QWORD PTR [r8+80], r11 ; Zero into %r8 ; Zero into %r9 ; A[14] x A[13] mov rdx, QWORD PTR [r9+104] mulx r10, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+88], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+96], r10 ; Diagonal 8 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[8] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+64] adcx rbx, rax adox r11, rcx ; A[9] x A[7] mulx rcx, rax, QWORD PTR [r9+72] adcx r11, rax adox r12, rcx ; No store %rbx - %r8 mov QWORD PTR [r8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] ; A[9] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+72] adcx r12, rax adox r10, rcx ; A[15] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+8], r12 mov QWORD PTR [r8+16], r10 mov r12, QWORD PTR [r8+32] mov r10, QWORD PTR [r8+40] ; A[15] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, rcx ; A[15] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+24], r11 mov QWORD PTR [r8+32], r12 mov r11, QWORD PTR [r8+48] mov r12, QWORD PTR [r8+56] ; A[15] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx ; A[15] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, rcx mov QWORD PTR [r8+40], r10 mov QWORD PTR [r8+48], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] ; A[15] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, rcx ; A[15] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+56], r12 mov QWORD PTR [r8+64], r10 mov r12, QWORD PTR [r8+80] mov r10, QWORD PTR [r8+88] ; A[15] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, rcx ; A[15] x A[11] mov rdx, QWORD PTR [r9+88] mulx rcx, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+72], r11 mov QWORD PTR [r8+80], r12 mov r11, QWORD PTR [r8+96] ; Zero into %r10 ; A[15] x A[12] mov rdx, QWORD PTR [r9+96] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx ; A[15] x A[13] mov rdx, QWORD PTR [r9+104] mulx r12, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+88], r10 mov QWORD PTR [r8+96], r11 ; Zero into %r8 ; Zero into %r9 ; A[15] x A[14] mov rdx, QWORD PTR [r9+112] mulx r10, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+104], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+112], r10 mov QWORD PTR [r8+120], r14 ; Double and Add in A[i] x A[i] mov r11, QWORD PTR [rbp+8] ; A[0] x A[0] mov rdx, QWORD PTR [r9] mulx rcx, rax, rdx mov QWORD PTR [rbp], rax adox r11, r11 adcx r11, rcx mov QWORD PTR [rbp+8], r11 mov r10, QWORD PTR [rbp+16] mov r11, QWORD PTR [rbp+24] ; A[1] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+16], r10 mov QWORD PTR [rbp+24], r11 mov r10, QWORD PTR [rbp+32] mov r11, QWORD PTR [rbp+40] ; A[2] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+32], r10 mov QWORD PTR [rbp+40], r11 mov r10, QWORD PTR [rbp+48] mov r11, QWORD PTR [rbp+56] ; A[3] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+48], r10 mov QWORD PTR [rbp+56], r11 mov r10, QWORD PTR [rbp+64] mov r11, QWORD PTR [rbp+72] ; A[4] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+64], r10 mov QWORD PTR [rbp+72], r11 mov r10, QWORD PTR [rbp+80] mov r11, QWORD PTR [rbp+88] ; A[5] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+80], r10 mov QWORD PTR [rbp+88], r11 ; A[6] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, rdx adox r15, r15 adox rdi, rdi adcx r15, rax adcx rdi, rcx ; A[7] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, rdx adox rsi, rsi adox rbx, rbx adcx rsi, rax adcx rbx, rcx mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[8] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8], r10 mov QWORD PTR [r8+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] ; A[9] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+16], r10 mov QWORD PTR [r8+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[10] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+32], r10 mov QWORD PTR [r8+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] ; A[11] x A[11] mov rdx, QWORD PTR [r9+88] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+48], r10 mov QWORD PTR [r8+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] ; A[12] x A[12] mov rdx, QWORD PTR [r9+96] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+64], r10 mov QWORD PTR [r8+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] ; A[13] x A[13] mov rdx, QWORD PTR [r9+104] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+80], r10 mov QWORD PTR [r8+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] ; A[14] x A[14] mov rdx, QWORD PTR [r9+112] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+96], r10 mov QWORD PTR [r8+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] ; A[15] x A[15] mov rdx, QWORD PTR [r9+120] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+112], r10 mov QWORD PTR [r8+120], r11 mov QWORD PTR [r8+-32], r15 mov QWORD PTR [r8+-24], rdi mov QWORD PTR [r8+-16], rsi mov QWORD PTR [r8+-8], rbx sub r8, 128 cmp r9, r8 jne L_end_2048_sqr_avx2_16 vmovdqu xmm0, OWORD PTR [rbp] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbp+16] vmovups OWORD PTR [r8+16], xmm0 vmovdqu xmm0, OWORD PTR [rbp+32] vmovups OWORD PTR [r8+32], xmm0 vmovdqu xmm0, OWORD PTR [rbp+48] vmovups OWORD PTR [r8+48], xmm0 vmovdqu xmm0, OWORD PTR [rbp+64] vmovups OWORD PTR [r8+64], xmm0 vmovdqu xmm0, OWORD PTR [rbp+80] vmovups OWORD PTR [r8+80], xmm0 L_end_2048_sqr_avx2_16: add rsp, 128 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_2048_sqr_avx2_16 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_sqr_32 PROC sub rsp, 272 mov QWORD PTR [rsp+256], rcx mov QWORD PTR [rsp+264], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+128] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r10+88], r8 sbb rax, QWORD PTR [r11+96] mov r8, QWORD PTR [rdx+104] mov QWORD PTR [r10+96], rax sbb r8, QWORD PTR [r11+104] mov rax, QWORD PTR [rdx+112] mov QWORD PTR [r10+104], r8 sbb rax, QWORD PTR [r11+112] mov r8, QWORD PTR [rdx+120] mov QWORD PTR [r10+112], rax sbb r8, QWORD PTR [r11+120] mov QWORD PTR [r10+120], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+96] setc r11b mov QWORD PTR [r10+88], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+104] setc r11b mov QWORD PTR [r10+96], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+112] setc r11b mov QWORD PTR [r10+104], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+120] setc r11b mov QWORD PTR [r10+112], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+120], r8 mov rdx, r10 mov rcx, rsp call sp_2048_sqr_16 mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] add rdx, 128 add rcx, 256 call sp_2048_sqr_16 mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] call sp_2048_sqr_16 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] ENDIF mov rdx, QWORD PTR [rsp+256] lea r10, QWORD PTR [rsp+128] add rdx, 384 mov r9, 0 mov r8, QWORD PTR [r10+-128] sub r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov QWORD PTR [r10+120], rax sbb r9, 0 sub rdx, 256 mov r8, QWORD PTR [r10+-128] sub r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov QWORD PTR [r10+120], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+256] neg r9 add rcx, 256 mov r8, QWORD PTR [rcx+-128] sub r8, QWORD PTR [r10+-128] mov rax, QWORD PTR [rcx+-120] mov QWORD PTR [rcx+-128], r8 sbb rax, QWORD PTR [r10+-120] mov r8, QWORD PTR [rcx+-112] mov QWORD PTR [rcx+-120], rax sbb r8, QWORD PTR [r10+-112] mov rax, QWORD PTR [rcx+-104] mov QWORD PTR [rcx+-112], r8 sbb rax, QWORD PTR [r10+-104] mov r8, QWORD PTR [rcx+-96] mov QWORD PTR [rcx+-104], rax sbb r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax sbb r8, QWORD PTR [r10+96] mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb rax, QWORD PTR [r10+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax sbb r8, QWORD PTR [r10+112] mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb rax, QWORD PTR [r10+120] mov QWORD PTR [rcx+120], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+256] add rcx, 384 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax adc r8, 0 mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 adc rax, 0 mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax adc r8, 0 mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 adc rax, 0 mov QWORD PTR [rcx+120], rax mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] add rsp, 272 ret sp_2048_sqr_32 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_sqr_avx2_32 PROC sub rsp, 272 mov QWORD PTR [rsp+256], rcx mov QWORD PTR [rsp+264], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+128] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r10+88], r8 sbb rax, QWORD PTR [r11+96] mov r8, QWORD PTR [rdx+104] mov QWORD PTR [r10+96], rax sbb r8, QWORD PTR [r11+104] mov rax, QWORD PTR [rdx+112] mov QWORD PTR [r10+104], r8 sbb rax, QWORD PTR [r11+112] mov r8, QWORD PTR [rdx+120] mov QWORD PTR [r10+112], rax sbb r8, QWORD PTR [r11+120] mov QWORD PTR [r10+120], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+96] setc r11b mov QWORD PTR [r10+88], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+104] setc r11b mov QWORD PTR [r10+96], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+112] setc r11b mov QWORD PTR [r10+104], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+120] setc r11b mov QWORD PTR [r10+112], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+120], r8 mov rdx, r10 mov rcx, rsp call sp_2048_sqr_avx2_16 mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] add rdx, 128 add rcx, 256 call sp_2048_sqr_avx2_16 mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] call sp_2048_sqr_avx2_16 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] ENDIF mov rdx, QWORD PTR [rsp+256] lea r10, QWORD PTR [rsp+128] add rdx, 384 mov r9, 0 mov r8, QWORD PTR [r10+-128] sub r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov QWORD PTR [r10+120], rax sbb r9, 0 sub rdx, 256 mov r8, QWORD PTR [r10+-128] sub r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov QWORD PTR [r10+120], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+256] neg r9 add rcx, 256 mov r8, QWORD PTR [rcx+-128] sub r8, QWORD PTR [r10+-128] mov rax, QWORD PTR [rcx+-120] mov QWORD PTR [rcx+-128], r8 sbb rax, QWORD PTR [r10+-120] mov r8, QWORD PTR [rcx+-112] mov QWORD PTR [rcx+-120], rax sbb r8, QWORD PTR [r10+-112] mov rax, QWORD PTR [rcx+-104] mov QWORD PTR [rcx+-112], r8 sbb rax, QWORD PTR [r10+-104] mov r8, QWORD PTR [rcx+-96] mov QWORD PTR [rcx+-104], rax sbb r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax sbb r8, QWORD PTR [r10+96] mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb rax, QWORD PTR [r10+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax sbb r8, QWORD PTR [r10+112] mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb rax, QWORD PTR [r10+120] mov QWORD PTR [rcx+120], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+256] add rcx, 384 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax adc r8, 0 mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 adc rax, 0 mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax adc r8, 0 mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 adc rax, 0 mov QWORD PTR [rcx+120], rax mov rdx, QWORD PTR [rsp+264] mov rcx, QWORD PTR [rsp+256] add rsp, 272 ret sp_2048_sqr_avx2_32 ENDP _text ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_sub_in_place_16 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] mov r9, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb r9, QWORD PTR [rdx+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], r9 sbb r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb r9, QWORD PTR [rdx+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], r9 sbb r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb r9, QWORD PTR [rdx+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], r9 sbb r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb r9, QWORD PTR [rdx+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], r9 sbb r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb r9, QWORD PTR [rdx+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], r9 sbb r8, QWORD PTR [rdx+80] mov r9, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb r9, QWORD PTR [rdx+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], r9 sbb r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb r9, QWORD PTR [rdx+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], r9 sbb r8, QWORD PTR [rdx+112] mov r9, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb r9, QWORD PTR [rdx+120] mov QWORD PTR [rcx+120], r9 sbb rax, rax ret sp_2048_sub_in_place_16 ENDP _text ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_2048_mul_d_32 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+40] add r12, rax mov QWORD PTR [rcx+40], r12 adc r10, rdx adc r11, 0 ; A[6] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+48] add r10, rax mov QWORD PTR [rcx+48], r10 adc r11, rdx adc r12, 0 ; A[7] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+56] add r11, rax mov QWORD PTR [rcx+56], r11 adc r12, rdx adc r10, 0 ; A[8] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+64] add r12, rax mov QWORD PTR [rcx+64], r12 adc r10, rdx adc r11, 0 ; A[9] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+72] add r10, rax mov QWORD PTR [rcx+72], r10 adc r11, rdx adc r12, 0 ; A[10] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+80] add r11, rax mov QWORD PTR [rcx+80], r11 adc r12, rdx adc r10, 0 ; A[11] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+88] add r12, rax mov QWORD PTR [rcx+88], r12 adc r10, rdx adc r11, 0 ; A[12] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+96] add r10, rax mov QWORD PTR [rcx+96], r10 adc r11, rdx adc r12, 0 ; A[13] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+104] add r11, rax mov QWORD PTR [rcx+104], r11 adc r12, rdx adc r10, 0 ; A[14] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+112] add r12, rax mov QWORD PTR [rcx+112], r12 adc r10, rdx adc r11, 0 ; A[15] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+120] add r10, rax mov QWORD PTR [rcx+120], r10 adc r11, rdx adc r12, 0 ; A[16] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+128] add r11, rax mov QWORD PTR [rcx+128], r11 adc r12, rdx adc r10, 0 ; A[17] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+136] add r12, rax mov QWORD PTR [rcx+136], r12 adc r10, rdx adc r11, 0 ; A[18] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+144] add r10, rax mov QWORD PTR [rcx+144], r10 adc r11, rdx adc r12, 0 ; A[19] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+152] add r11, rax mov QWORD PTR [rcx+152], r11 adc r12, rdx adc r10, 0 ; A[20] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+160] add r12, rax mov QWORD PTR [rcx+160], r12 adc r10, rdx adc r11, 0 ; A[21] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+168] add r10, rax mov QWORD PTR [rcx+168], r10 adc r11, rdx adc r12, 0 ; A[22] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+176] add r11, rax mov QWORD PTR [rcx+176], r11 adc r12, rdx adc r10, 0 ; A[23] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+184] add r12, rax mov QWORD PTR [rcx+184], r12 adc r10, rdx adc r11, 0 ; A[24] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+192] add r10, rax mov QWORD PTR [rcx+192], r10 adc r11, rdx adc r12, 0 ; A[25] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+200] add r11, rax mov QWORD PTR [rcx+200], r11 adc r12, rdx adc r10, 0 ; A[26] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+208] add r12, rax mov QWORD PTR [rcx+208], r12 adc r10, rdx adc r11, 0 ; A[27] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+216] add r10, rax mov QWORD PTR [rcx+216], r10 adc r11, rdx adc r12, 0 ; A[28] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+224] add r11, rax mov QWORD PTR [rcx+224], r11 adc r12, rdx adc r10, 0 ; A[29] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+232] add r12, rax mov QWORD PTR [rcx+232], r12 adc r10, rdx adc r11, 0 ; A[30] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+240] add r10, rax mov QWORD PTR [rcx+240], r10 adc r11, rdx adc r12, 0 ; A[31] * B mov rax, r8 mul QWORD PTR [r9+248] add r11, rax adc r12, rdx mov QWORD PTR [rcx+248], r11 mov QWORD PTR [rcx+256], r12 pop r12 ret sp_2048_mul_d_32 ENDP _text ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_2048_cond_sub_16 PROC sub rsp, 128 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] sbb r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] sbb r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] sbb r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] sbb r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] sbb r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] sbb r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] sbb r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] sbb r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] sbb r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] sbb r11, r8 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb rax, rax add rsp, 128 ret sp_2048_cond_sub_16 ENDP _text ENDS ; /* Reduce the number back to 2048 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_2048_mont_reduce_16 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 16 mov r10, 16 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_2048_mont_reduce_16_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+40], r14 adc r11, 0 ; a[i+6] += m[6] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+48] mov r14, QWORD PTR [rcx+48] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+48], r14 adc r12, 0 ; a[i+7] += m[7] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+56] mov r14, QWORD PTR [rcx+56] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+56], r14 adc r11, 0 ; a[i+8] += m[8] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+64] mov r14, QWORD PTR [rcx+64] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+64], r14 adc r12, 0 ; a[i+9] += m[9] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+72] mov r14, QWORD PTR [rcx+72] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+72], r14 adc r11, 0 ; a[i+10] += m[10] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+80] mov r14, QWORD PTR [rcx+80] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+80], r14 adc r12, 0 ; a[i+11] += m[11] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+88] mov r14, QWORD PTR [rcx+88] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+88], r14 adc r11, 0 ; a[i+12] += m[12] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+96] mov r14, QWORD PTR [rcx+96] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+96], r14 adc r12, 0 ; a[i+13] += m[13] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+104] mov r14, QWORD PTR [rcx+104] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+104], r14 adc r11, 0 ; a[i+14] += m[14] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+112] mov r14, QWORD PTR [rcx+112] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+112], r14 adc r12, 0 ; a[i+15] += m[15] * mu mov rax, r13 mul QWORD PTR [r9+120] mov r14, QWORD PTR [rcx+120] add r12, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r12 mov QWORD PTR [rcx+120], r14 adc QWORD PTR [rcx+128], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_2048_mont_reduce_16_loop mov QWORD PTR [rcx], r15 mov QWORD PTR [rcx+8], rdi neg rsi IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx mov rcx, rcx sub rcx, 128 call sp_2048_cond_sub_16 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_2048_mont_reduce_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_2048_cond_sub_avx2_16 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 sbb r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 sbb r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 sbb r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 sbb r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 sbb r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 sbb r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 sbb r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 sbb r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 sbb r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 sbb r10, r11 mov QWORD PTR [rcx+120], r10 sbb rax, rax pop r12 ret sp_2048_cond_sub_avx2_16 ENDP _text ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_2048_mul_d_16 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+40] add r12, rax mov QWORD PTR [rcx+40], r12 adc r10, rdx adc r11, 0 ; A[6] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+48] add r10, rax mov QWORD PTR [rcx+48], r10 adc r11, rdx adc r12, 0 ; A[7] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+56] add r11, rax mov QWORD PTR [rcx+56], r11 adc r12, rdx adc r10, 0 ; A[8] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+64] add r12, rax mov QWORD PTR [rcx+64], r12 adc r10, rdx adc r11, 0 ; A[9] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+72] add r10, rax mov QWORD PTR [rcx+72], r10 adc r11, rdx adc r12, 0 ; A[10] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+80] add r11, rax mov QWORD PTR [rcx+80], r11 adc r12, rdx adc r10, 0 ; A[11] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+88] add r12, rax mov QWORD PTR [rcx+88], r12 adc r10, rdx adc r11, 0 ; A[12] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+96] add r10, rax mov QWORD PTR [rcx+96], r10 adc r11, rdx adc r12, 0 ; A[13] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+104] add r11, rax mov QWORD PTR [rcx+104], r11 adc r12, rdx adc r10, 0 ; A[14] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+112] add r12, rax mov QWORD PTR [rcx+112], r12 adc r10, rdx adc r11, 0 ; A[15] * B mov rax, r8 mul QWORD PTR [r9+120] add r10, rax adc r11, rdx mov QWORD PTR [rcx+120], r10 mov QWORD PTR [rcx+128], r11 pop r12 ret sp_2048_mul_d_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_2048_mul_d_avx2_16 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; A[6] * B mulx r10, r9, QWORD PTR [rax+48] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; A[7] * B mulx r10, r9, QWORD PTR [rax+56] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; A[8] * B mulx r10, r9, QWORD PTR [rax+64] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+64], r11 ; A[9] * B mulx r10, r9, QWORD PTR [rax+72] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+72], r12 ; A[10] * B mulx r10, r9, QWORD PTR [rax+80] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+80], r11 ; A[11] * B mulx r10, r9, QWORD PTR [rax+88] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+88], r12 ; A[12] * B mulx r10, r9, QWORD PTR [rax+96] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+96], r11 ; A[13] * B mulx r10, r9, QWORD PTR [rax+104] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+104], r12 ; A[14] * B mulx r10, r9, QWORD PTR [rax+112] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+112], r11 ; A[15] * B mulx r10, r9, QWORD PTR [rax+120] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+120], r12 mov QWORD PTR [rcx+128], r11 pop r13 pop r12 ret sp_2048_mul_d_avx2_16 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_2048_word_asm_16 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_2048_word_asm_16 ENDP _text ENDS ENDIF ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_2048_cmp_16 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+120] mov r12, QWORD PTR [rdx+120] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+112] mov r12, QWORD PTR [rdx+112] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+104] mov r12, QWORD PTR [rdx+104] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+96] mov r12, QWORD PTR [rdx+96] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+88] mov r12, QWORD PTR [rdx+88] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+80] mov r12, QWORD PTR [rdx+80] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+72] mov r12, QWORD PTR [rdx+72] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+64] mov r12, QWORD PTR [rdx+64] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+56] mov r12, QWORD PTR [rdx+56] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+48] mov r12, QWORD PTR [rdx+48] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_2048_cmp_16 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_2048_get_from_table_16 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax pxor xmm13, xmm13 pshufd xmm11, xmm11, 0 pshufd xmm10, xmm10, 0 ; START: 0-7 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 0-7 ; START: 8-15 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 ; END: 8-15 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_2048_get_from_table_16 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 2048 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_2048_mont_reduce_avx2_16 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r9, rcx mov r10, rdx xor rbp, rbp ; i = 16 mov r11, 16 mov r14, QWORD PTR [r9] mov r15, QWORD PTR [r9+8] mov rdi, QWORD PTR [r9+16] mov rsi, QWORD PTR [r9+24] add r9, 64 xor rbp, rbp L_2048_mont_reduce_avx2_16_loop: ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-32] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-24] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-24], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+-8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-16], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-8], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+40], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+48], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9+64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+56], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+64], r12 adox rbp, rbx adcx rbp, rbx ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-24] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-16] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-8] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-16], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-8], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+8] adcx r13, rax adox r12, rcx mov QWORD PTR [r9], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+16] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+8], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+24] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+16], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+32] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+24], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9+40] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+32], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+48] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+40], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+56] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+48], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+64] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+56], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9+72] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+64], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+72], r12 adox rbp, rbx adcx rbp, rbx ; a += 2 add r9, 16 ; i -= 2 sub r11, 2 jnz L_2048_mont_reduce_avx2_16_loop sub r9, 64 neg rbp mov r8, r9 sub r9, 128 mov rcx, QWORD PTR [r10] mov rdx, r14 pext rcx, rcx, rbp sub rdx, rcx mov rcx, QWORD PTR [r10+8] mov rax, r15 pext rcx, rcx, rbp mov QWORD PTR [r9], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+16] mov rcx, rdi pext rdx, rdx, rbp mov QWORD PTR [r9+8], rax sbb rcx, rdx mov rax, QWORD PTR [r10+24] mov rdx, rsi pext rax, rax, rbp mov QWORD PTR [r9+16], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+32] mov rax, QWORD PTR [r8+32] pext rcx, rcx, rbp mov QWORD PTR [r9+24], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+40] mov rcx, QWORD PTR [r8+40] pext rdx, rdx, rbp mov QWORD PTR [r9+32], rax sbb rcx, rdx mov rax, QWORD PTR [r10+48] mov rdx, QWORD PTR [r8+48] pext rax, rax, rbp mov QWORD PTR [r9+40], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+56] mov rax, QWORD PTR [r8+56] pext rcx, rcx, rbp mov QWORD PTR [r9+48], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+64] mov rcx, QWORD PTR [r8+64] pext rdx, rdx, rbp mov QWORD PTR [r9+56], rax sbb rcx, rdx mov rax, QWORD PTR [r10+72] mov rdx, QWORD PTR [r8+72] pext rax, rax, rbp mov QWORD PTR [r9+64], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+80] mov rax, QWORD PTR [r8+80] pext rcx, rcx, rbp mov QWORD PTR [r9+72], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+88] mov rcx, QWORD PTR [r8+88] pext rdx, rdx, rbp mov QWORD PTR [r9+80], rax sbb rcx, rdx mov rax, QWORD PTR [r10+96] mov rdx, QWORD PTR [r8+96] pext rax, rax, rbp mov QWORD PTR [r9+88], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+104] mov rax, QWORD PTR [r8+104] pext rcx, rcx, rbp mov QWORD PTR [r9+96], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+112] mov rcx, QWORD PTR [r8+112] pext rdx, rdx, rbp mov QWORD PTR [r9+104], rax sbb rcx, rdx mov rax, QWORD PTR [r10+120] mov rdx, QWORD PTR [r8+120] pext rax, rax, rbp mov QWORD PTR [r9+112], rcx sbb rdx, rax mov QWORD PTR [r9+120], rdx pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_2048_mont_reduce_avx2_16 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_2048_get_from_table_avx2_16 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax vpxor ymm13, ymm13, ymm13 vpermd ymm10, ymm13, ymm10 vpermd ymm11, ymm13, ymm11 ; START: 0-15 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 ; END: 0-15 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_2048_get_from_table_avx2_16 ENDP _text ENDS ENDIF ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_2048_cond_sub_32 PROC sub rsp, 256 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [r8+128] mov r11, QWORD PTR [r8+136] and r10, r9 and r11, r9 mov QWORD PTR [rsp+128], r10 mov QWORD PTR [rsp+136], r11 mov r10, QWORD PTR [r8+144] mov r11, QWORD PTR [r8+152] and r10, r9 and r11, r9 mov QWORD PTR [rsp+144], r10 mov QWORD PTR [rsp+152], r11 mov r10, QWORD PTR [r8+160] mov r11, QWORD PTR [r8+168] and r10, r9 and r11, r9 mov QWORD PTR [rsp+160], r10 mov QWORD PTR [rsp+168], r11 mov r10, QWORD PTR [r8+176] mov r11, QWORD PTR [r8+184] and r10, r9 and r11, r9 mov QWORD PTR [rsp+176], r10 mov QWORD PTR [rsp+184], r11 mov r10, QWORD PTR [r8+192] mov r11, QWORD PTR [r8+200] and r10, r9 and r11, r9 mov QWORD PTR [rsp+192], r10 mov QWORD PTR [rsp+200], r11 mov r10, QWORD PTR [r8+208] mov r11, QWORD PTR [r8+216] and r10, r9 and r11, r9 mov QWORD PTR [rsp+208], r10 mov QWORD PTR [rsp+216], r11 mov r10, QWORD PTR [r8+224] mov r11, QWORD PTR [r8+232] and r10, r9 and r11, r9 mov QWORD PTR [rsp+224], r10 mov QWORD PTR [rsp+232], r11 mov r10, QWORD PTR [r8+240] mov r11, QWORD PTR [r8+248] and r10, r9 and r11, r9 mov QWORD PTR [rsp+240], r10 mov QWORD PTR [rsp+248], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] sbb r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] sbb r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] sbb r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] sbb r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] sbb r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] sbb r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] sbb r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] sbb r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] sbb r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] sbb r11, r8 mov QWORD PTR [rcx+112], r10 mov r10, QWORD PTR [rdx+128] mov r8, QWORD PTR [rsp+128] sbb r10, r8 mov QWORD PTR [rcx+120], r11 mov r11, QWORD PTR [rdx+136] mov r8, QWORD PTR [rsp+136] sbb r11, r8 mov QWORD PTR [rcx+128], r10 mov r10, QWORD PTR [rdx+144] mov r8, QWORD PTR [rsp+144] sbb r10, r8 mov QWORD PTR [rcx+136], r11 mov r11, QWORD PTR [rdx+152] mov r8, QWORD PTR [rsp+152] sbb r11, r8 mov QWORD PTR [rcx+144], r10 mov r10, QWORD PTR [rdx+160] mov r8, QWORD PTR [rsp+160] sbb r10, r8 mov QWORD PTR [rcx+152], r11 mov r11, QWORD PTR [rdx+168] mov r8, QWORD PTR [rsp+168] sbb r11, r8 mov QWORD PTR [rcx+160], r10 mov r10, QWORD PTR [rdx+176] mov r8, QWORD PTR [rsp+176] sbb r10, r8 mov QWORD PTR [rcx+168], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rsp+184] sbb r11, r8 mov QWORD PTR [rcx+176], r10 mov r10, QWORD PTR [rdx+192] mov r8, QWORD PTR [rsp+192] sbb r10, r8 mov QWORD PTR [rcx+184], r11 mov r11, QWORD PTR [rdx+200] mov r8, QWORD PTR [rsp+200] sbb r11, r8 mov QWORD PTR [rcx+192], r10 mov r10, QWORD PTR [rdx+208] mov r8, QWORD PTR [rsp+208] sbb r10, r8 mov QWORD PTR [rcx+200], r11 mov r11, QWORD PTR [rdx+216] mov r8, QWORD PTR [rsp+216] sbb r11, r8 mov QWORD PTR [rcx+208], r10 mov r10, QWORD PTR [rdx+224] mov r8, QWORD PTR [rsp+224] sbb r10, r8 mov QWORD PTR [rcx+216], r11 mov r11, QWORD PTR [rdx+232] mov r8, QWORD PTR [rsp+232] sbb r11, r8 mov QWORD PTR [rcx+224], r10 mov r10, QWORD PTR [rdx+240] mov r8, QWORD PTR [rsp+240] sbb r10, r8 mov QWORD PTR [rcx+232], r11 mov r11, QWORD PTR [rdx+248] mov r8, QWORD PTR [rsp+248] sbb r11, r8 mov QWORD PTR [rcx+240], r10 mov QWORD PTR [rcx+248], r11 sbb rax, rax add rsp, 256 ret sp_2048_cond_sub_32 ENDP _text ENDS ; /* Reduce the number back to 2048 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_2048_mont_reduce_32 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 32 mov r10, 32 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_2048_mont_reduce_32_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+40], r14 adc r11, 0 ; a[i+6] += m[6] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+48] mov r14, QWORD PTR [rcx+48] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+48], r14 adc r12, 0 ; a[i+7] += m[7] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+56] mov r14, QWORD PTR [rcx+56] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+56], r14 adc r11, 0 ; a[i+8] += m[8] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+64] mov r14, QWORD PTR [rcx+64] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+64], r14 adc r12, 0 ; a[i+9] += m[9] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+72] mov r14, QWORD PTR [rcx+72] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+72], r14 adc r11, 0 ; a[i+10] += m[10] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+80] mov r14, QWORD PTR [rcx+80] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+80], r14 adc r12, 0 ; a[i+11] += m[11] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+88] mov r14, QWORD PTR [rcx+88] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+88], r14 adc r11, 0 ; a[i+12] += m[12] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+96] mov r14, QWORD PTR [rcx+96] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+96], r14 adc r12, 0 ; a[i+13] += m[13] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+104] mov r14, QWORD PTR [rcx+104] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+104], r14 adc r11, 0 ; a[i+14] += m[14] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+112] mov r14, QWORD PTR [rcx+112] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+112], r14 adc r12, 0 ; a[i+15] += m[15] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+120] mov r14, QWORD PTR [rcx+120] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+120], r14 adc r11, 0 ; a[i+16] += m[16] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+128] mov r14, QWORD PTR [rcx+128] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+128], r14 adc r12, 0 ; a[i+17] += m[17] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+136] mov r14, QWORD PTR [rcx+136] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+136], r14 adc r11, 0 ; a[i+18] += m[18] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+144] mov r14, QWORD PTR [rcx+144] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+144], r14 adc r12, 0 ; a[i+19] += m[19] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+152] mov r14, QWORD PTR [rcx+152] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+152], r14 adc r11, 0 ; a[i+20] += m[20] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+160] mov r14, QWORD PTR [rcx+160] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+160], r14 adc r12, 0 ; a[i+21] += m[21] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+168] mov r14, QWORD PTR [rcx+168] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+168], r14 adc r11, 0 ; a[i+22] += m[22] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+176] mov r14, QWORD PTR [rcx+176] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+176], r14 adc r12, 0 ; a[i+23] += m[23] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+184] mov r14, QWORD PTR [rcx+184] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+184], r14 adc r11, 0 ; a[i+24] += m[24] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+192] mov r14, QWORD PTR [rcx+192] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+192], r14 adc r12, 0 ; a[i+25] += m[25] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+200] mov r14, QWORD PTR [rcx+200] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+200], r14 adc r11, 0 ; a[i+26] += m[26] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+208] mov r14, QWORD PTR [rcx+208] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+208], r14 adc r12, 0 ; a[i+27] += m[27] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+216] mov r14, QWORD PTR [rcx+216] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+216], r14 adc r11, 0 ; a[i+28] += m[28] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+224] mov r14, QWORD PTR [rcx+224] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+224], r14 adc r12, 0 ; a[i+29] += m[29] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+232] mov r14, QWORD PTR [rcx+232] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+232], r14 adc r11, 0 ; a[i+30] += m[30] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+240] mov r14, QWORD PTR [rcx+240] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+240], r14 adc r12, 0 ; a[i+31] += m[31] * mu mov rax, r13 mul QWORD PTR [r9+248] mov r14, QWORD PTR [rcx+248] add r12, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r12 mov QWORD PTR [rcx+248], r14 adc QWORD PTR [rcx+256], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_2048_mont_reduce_32_loop mov QWORD PTR [rcx], r15 mov QWORD PTR [rcx+8], rdi neg rsi IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx mov rcx, rcx sub rcx, 256 call sp_2048_cond_sub_32 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_2048_mont_reduce_32 ENDP _text ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_2048_sub_32 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 sbb r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 sbb r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 sbb r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 sbb r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 sbb r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 sbb r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 sbb r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 sbb r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 sbb r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 sbb r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 sbb r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 sbb r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 sbb r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 sbb r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 sbb r10, QWORD PTR [r8+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [rcx+120], r10 sbb r9, QWORD PTR [r8+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [rcx+128], r9 sbb r10, QWORD PTR [r8+136] mov r9, QWORD PTR [rdx+144] mov QWORD PTR [rcx+136], r10 sbb r9, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+152] mov QWORD PTR [rcx+144], r9 sbb r10, QWORD PTR [r8+152] mov r9, QWORD PTR [rdx+160] mov QWORD PTR [rcx+152], r10 sbb r9, QWORD PTR [r8+160] mov r10, QWORD PTR [rdx+168] mov QWORD PTR [rcx+160], r9 sbb r10, QWORD PTR [r8+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [rcx+168], r10 sbb r9, QWORD PTR [r8+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [rcx+176], r9 sbb r10, QWORD PTR [r8+184] mov r9, QWORD PTR [rdx+192] mov QWORD PTR [rcx+184], r10 sbb r9, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+200] mov QWORD PTR [rcx+192], r9 sbb r10, QWORD PTR [r8+200] mov r9, QWORD PTR [rdx+208] mov QWORD PTR [rcx+200], r10 sbb r9, QWORD PTR [r8+208] mov r10, QWORD PTR [rdx+216] mov QWORD PTR [rcx+208], r9 sbb r10, QWORD PTR [r8+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [rcx+216], r10 sbb r9, QWORD PTR [r8+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [rcx+224], r9 sbb r10, QWORD PTR [r8+232] mov r9, QWORD PTR [rdx+240] mov QWORD PTR [rcx+232], r10 sbb r9, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+248] mov QWORD PTR [rcx+240], r9 sbb r10, QWORD PTR [r8+248] mov QWORD PTR [rcx+248], r10 sbb rax, rax ret sp_2048_sub_32 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_2048_mul_d_avx2_32 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; A[6] * B mulx r10, r9, QWORD PTR [rax+48] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; A[7] * B mulx r10, r9, QWORD PTR [rax+56] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; A[8] * B mulx r10, r9, QWORD PTR [rax+64] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+64], r11 ; A[9] * B mulx r10, r9, QWORD PTR [rax+72] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+72], r12 ; A[10] * B mulx r10, r9, QWORD PTR [rax+80] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+80], r11 ; A[11] * B mulx r10, r9, QWORD PTR [rax+88] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+88], r12 ; A[12] * B mulx r10, r9, QWORD PTR [rax+96] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+96], r11 ; A[13] * B mulx r10, r9, QWORD PTR [rax+104] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+104], r12 ; A[14] * B mulx r10, r9, QWORD PTR [rax+112] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+112], r11 ; A[15] * B mulx r10, r9, QWORD PTR [rax+120] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+120], r12 ; A[16] * B mulx r10, r9, QWORD PTR [rax+128] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+128], r11 ; A[17] * B mulx r10, r9, QWORD PTR [rax+136] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+136], r12 ; A[18] * B mulx r10, r9, QWORD PTR [rax+144] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+144], r11 ; A[19] * B mulx r10, r9, QWORD PTR [rax+152] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+152], r12 ; A[20] * B mulx r10, r9, QWORD PTR [rax+160] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+160], r11 ; A[21] * B mulx r10, r9, QWORD PTR [rax+168] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+168], r12 ; A[22] * B mulx r10, r9, QWORD PTR [rax+176] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+176], r11 ; A[23] * B mulx r10, r9, QWORD PTR [rax+184] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+184], r12 ; A[24] * B mulx r10, r9, QWORD PTR [rax+192] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+192], r11 ; A[25] * B mulx r10, r9, QWORD PTR [rax+200] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+200], r12 ; A[26] * B mulx r10, r9, QWORD PTR [rax+208] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+208], r11 ; A[27] * B mulx r10, r9, QWORD PTR [rax+216] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+216], r12 ; A[28] * B mulx r10, r9, QWORD PTR [rax+224] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+224], r11 ; A[29] * B mulx r10, r9, QWORD PTR [rax+232] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+232], r12 ; A[30] * B mulx r10, r9, QWORD PTR [rax+240] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+240], r11 ; A[31] * B mulx r10, r9, QWORD PTR [rax+248] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+248], r12 mov QWORD PTR [rcx+256], r11 pop r13 pop r12 ret sp_2048_mul_d_avx2_32 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_2048_word_asm_32 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_2048_word_asm_32 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_2048_cond_sub_avx2_32 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 sbb r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 sbb r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 sbb r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 sbb r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 sbb r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 sbb r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 sbb r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 sbb r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 sbb r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 sbb r10, r11 mov r12, QWORD PTR [r8+128] mov r11, QWORD PTR [rdx+128] pext r12, r12, r9 mov QWORD PTR [rcx+120], r10 sbb r11, r12 mov r10, QWORD PTR [r8+136] mov r12, QWORD PTR [rdx+136] pext r10, r10, r9 mov QWORD PTR [rcx+128], r11 sbb r12, r10 mov r11, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+144] pext r11, r11, r9 mov QWORD PTR [rcx+136], r12 sbb r10, r11 mov r12, QWORD PTR [r8+152] mov r11, QWORD PTR [rdx+152] pext r12, r12, r9 mov QWORD PTR [rcx+144], r10 sbb r11, r12 mov r10, QWORD PTR [r8+160] mov r12, QWORD PTR [rdx+160] pext r10, r10, r9 mov QWORD PTR [rcx+152], r11 sbb r12, r10 mov r11, QWORD PTR [r8+168] mov r10, QWORD PTR [rdx+168] pext r11, r11, r9 mov QWORD PTR [rcx+160], r12 sbb r10, r11 mov r12, QWORD PTR [r8+176] mov r11, QWORD PTR [rdx+176] pext r12, r12, r9 mov QWORD PTR [rcx+168], r10 sbb r11, r12 mov r10, QWORD PTR [r8+184] mov r12, QWORD PTR [rdx+184] pext r10, r10, r9 mov QWORD PTR [rcx+176], r11 sbb r12, r10 mov r11, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+192] pext r11, r11, r9 mov QWORD PTR [rcx+184], r12 sbb r10, r11 mov r12, QWORD PTR [r8+200] mov r11, QWORD PTR [rdx+200] pext r12, r12, r9 mov QWORD PTR [rcx+192], r10 sbb r11, r12 mov r10, QWORD PTR [r8+208] mov r12, QWORD PTR [rdx+208] pext r10, r10, r9 mov QWORD PTR [rcx+200], r11 sbb r12, r10 mov r11, QWORD PTR [r8+216] mov r10, QWORD PTR [rdx+216] pext r11, r11, r9 mov QWORD PTR [rcx+208], r12 sbb r10, r11 mov r12, QWORD PTR [r8+224] mov r11, QWORD PTR [rdx+224] pext r12, r12, r9 mov QWORD PTR [rcx+216], r10 sbb r11, r12 mov r10, QWORD PTR [r8+232] mov r12, QWORD PTR [rdx+232] pext r10, r10, r9 mov QWORD PTR [rcx+224], r11 sbb r12, r10 mov r11, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+240] pext r11, r11, r9 mov QWORD PTR [rcx+232], r12 sbb r10, r11 mov r12, QWORD PTR [r8+248] mov r11, QWORD PTR [rdx+248] pext r12, r12, r9 mov QWORD PTR [rcx+240], r10 sbb r11, r12 mov QWORD PTR [rcx+248], r11 sbb rax, rax pop r12 ret sp_2048_cond_sub_avx2_32 ENDP _text ENDS ENDIF ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_2048_cmp_32 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+248] mov r12, QWORD PTR [rdx+248] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+240] mov r12, QWORD PTR [rdx+240] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+232] mov r12, QWORD PTR [rdx+232] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+224] mov r12, QWORD PTR [rdx+224] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+216] mov r12, QWORD PTR [rdx+216] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+208] mov r12, QWORD PTR [rdx+208] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+200] mov r12, QWORD PTR [rdx+200] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+192] mov r12, QWORD PTR [rdx+192] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+184] mov r12, QWORD PTR [rdx+184] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+176] mov r12, QWORD PTR [rdx+176] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+168] mov r12, QWORD PTR [rdx+168] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+160] mov r12, QWORD PTR [rdx+160] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+152] mov r12, QWORD PTR [rdx+152] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+144] mov r12, QWORD PTR [rdx+144] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+136] mov r12, QWORD PTR [rdx+136] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+128] mov r12, QWORD PTR [rdx+128] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+120] mov r12, QWORD PTR [rdx+120] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+112] mov r12, QWORD PTR [rdx+112] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+104] mov r12, QWORD PTR [rdx+104] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+96] mov r12, QWORD PTR [rdx+96] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+88] mov r12, QWORD PTR [rdx+88] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+80] mov r12, QWORD PTR [rdx+80] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+72] mov r12, QWORD PTR [rdx+72] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+64] mov r12, QWORD PTR [rdx+64] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+56] mov r12, QWORD PTR [rdx+56] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+48] mov r12, QWORD PTR [rdx+48] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_2048_cmp_32 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_2048_get_from_table_32 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax pxor xmm13, xmm13 pshufd xmm11, xmm11, 0 pshufd xmm10, xmm10, 0 ; START: 0-7 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 32 mov r9, QWORD PTR [rdx+256] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 33 mov r9, QWORD PTR [rdx+264] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 34 mov r9, QWORD PTR [rdx+272] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 35 mov r9, QWORD PTR [rdx+280] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 36 mov r9, QWORD PTR [rdx+288] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 37 mov r9, QWORD PTR [rdx+296] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 38 mov r9, QWORD PTR [rdx+304] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 39 mov r9, QWORD PTR [rdx+312] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 40 mov r9, QWORD PTR [rdx+320] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 41 mov r9, QWORD PTR [rdx+328] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 42 mov r9, QWORD PTR [rdx+336] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 43 mov r9, QWORD PTR [rdx+344] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 44 mov r9, QWORD PTR [rdx+352] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 45 mov r9, QWORD PTR [rdx+360] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 46 mov r9, QWORD PTR [rdx+368] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 47 mov r9, QWORD PTR [rdx+376] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 48 mov r9, QWORD PTR [rdx+384] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 49 mov r9, QWORD PTR [rdx+392] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 50 mov r9, QWORD PTR [rdx+400] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 51 mov r9, QWORD PTR [rdx+408] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 52 mov r9, QWORD PTR [rdx+416] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 53 mov r9, QWORD PTR [rdx+424] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 54 mov r9, QWORD PTR [rdx+432] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 55 mov r9, QWORD PTR [rdx+440] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 56 mov r9, QWORD PTR [rdx+448] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 57 mov r9, QWORD PTR [rdx+456] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 58 mov r9, QWORD PTR [rdx+464] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 59 mov r9, QWORD PTR [rdx+472] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 60 mov r9, QWORD PTR [rdx+480] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 61 mov r9, QWORD PTR [rdx+488] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 62 mov r9, QWORD PTR [rdx+496] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 63 mov r9, QWORD PTR [rdx+504] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 0-7 ; START: 8-15 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 32 mov r9, QWORD PTR [rdx+256] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 33 mov r9, QWORD PTR [rdx+264] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 34 mov r9, QWORD PTR [rdx+272] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 35 mov r9, QWORD PTR [rdx+280] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 36 mov r9, QWORD PTR [rdx+288] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 37 mov r9, QWORD PTR [rdx+296] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 38 mov r9, QWORD PTR [rdx+304] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 39 mov r9, QWORD PTR [rdx+312] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 40 mov r9, QWORD PTR [rdx+320] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 41 mov r9, QWORD PTR [rdx+328] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 42 mov r9, QWORD PTR [rdx+336] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 43 mov r9, QWORD PTR [rdx+344] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 44 mov r9, QWORD PTR [rdx+352] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 45 mov r9, QWORD PTR [rdx+360] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 46 mov r9, QWORD PTR [rdx+368] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 47 mov r9, QWORD PTR [rdx+376] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 48 mov r9, QWORD PTR [rdx+384] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 49 mov r9, QWORD PTR [rdx+392] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 50 mov r9, QWORD PTR [rdx+400] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 51 mov r9, QWORD PTR [rdx+408] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 52 mov r9, QWORD PTR [rdx+416] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 53 mov r9, QWORD PTR [rdx+424] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 54 mov r9, QWORD PTR [rdx+432] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 55 mov r9, QWORD PTR [rdx+440] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 56 mov r9, QWORD PTR [rdx+448] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 57 mov r9, QWORD PTR [rdx+456] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 58 mov r9, QWORD PTR [rdx+464] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 59 mov r9, QWORD PTR [rdx+472] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 60 mov r9, QWORD PTR [rdx+480] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 61 mov r9, QWORD PTR [rdx+488] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 62 mov r9, QWORD PTR [rdx+496] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 63 mov r9, QWORD PTR [rdx+504] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 8-15 ; START: 16-23 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 32 mov r9, QWORD PTR [rdx+256] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 33 mov r9, QWORD PTR [rdx+264] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 34 mov r9, QWORD PTR [rdx+272] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 35 mov r9, QWORD PTR [rdx+280] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 36 mov r9, QWORD PTR [rdx+288] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 37 mov r9, QWORD PTR [rdx+296] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 38 mov r9, QWORD PTR [rdx+304] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 39 mov r9, QWORD PTR [rdx+312] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 40 mov r9, QWORD PTR [rdx+320] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 41 mov r9, QWORD PTR [rdx+328] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 42 mov r9, QWORD PTR [rdx+336] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 43 mov r9, QWORD PTR [rdx+344] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 44 mov r9, QWORD PTR [rdx+352] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 45 mov r9, QWORD PTR [rdx+360] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 46 mov r9, QWORD PTR [rdx+368] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 47 mov r9, QWORD PTR [rdx+376] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 48 mov r9, QWORD PTR [rdx+384] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 49 mov r9, QWORD PTR [rdx+392] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 50 mov r9, QWORD PTR [rdx+400] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 51 mov r9, QWORD PTR [rdx+408] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 52 mov r9, QWORD PTR [rdx+416] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 53 mov r9, QWORD PTR [rdx+424] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 54 mov r9, QWORD PTR [rdx+432] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 55 mov r9, QWORD PTR [rdx+440] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 56 mov r9, QWORD PTR [rdx+448] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 57 mov r9, QWORD PTR [rdx+456] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 58 mov r9, QWORD PTR [rdx+464] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 59 mov r9, QWORD PTR [rdx+472] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 60 mov r9, QWORD PTR [rdx+480] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 61 mov r9, QWORD PTR [rdx+488] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 62 mov r9, QWORD PTR [rdx+496] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 63 mov r9, QWORD PTR [rdx+504] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 16-23 ; START: 24-31 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 32 mov r9, QWORD PTR [rdx+256] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 33 mov r9, QWORD PTR [rdx+264] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 34 mov r9, QWORD PTR [rdx+272] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 35 mov r9, QWORD PTR [rdx+280] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 36 mov r9, QWORD PTR [rdx+288] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 37 mov r9, QWORD PTR [rdx+296] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 38 mov r9, QWORD PTR [rdx+304] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 39 mov r9, QWORD PTR [rdx+312] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 40 mov r9, QWORD PTR [rdx+320] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 41 mov r9, QWORD PTR [rdx+328] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 42 mov r9, QWORD PTR [rdx+336] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 43 mov r9, QWORD PTR [rdx+344] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 44 mov r9, QWORD PTR [rdx+352] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 45 mov r9, QWORD PTR [rdx+360] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 46 mov r9, QWORD PTR [rdx+368] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 47 mov r9, QWORD PTR [rdx+376] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 48 mov r9, QWORD PTR [rdx+384] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 49 mov r9, QWORD PTR [rdx+392] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 50 mov r9, QWORD PTR [rdx+400] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 51 mov r9, QWORD PTR [rdx+408] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 52 mov r9, QWORD PTR [rdx+416] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 53 mov r9, QWORD PTR [rdx+424] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 54 mov r9, QWORD PTR [rdx+432] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 55 mov r9, QWORD PTR [rdx+440] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 56 mov r9, QWORD PTR [rdx+448] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 57 mov r9, QWORD PTR [rdx+456] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 58 mov r9, QWORD PTR [rdx+464] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 59 mov r9, QWORD PTR [rdx+472] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 60 mov r9, QWORD PTR [rdx+480] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 61 mov r9, QWORD PTR [rdx+488] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 62 mov r9, QWORD PTR [rdx+496] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 63 mov r9, QWORD PTR [rdx+504] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 ; END: 24-31 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_2048_get_from_table_32 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 2048 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_2048_mont_reduce_avx2_32 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r9, rcx mov r10, rdx xor rbp, rbp ; i = 32 mov r11, 32 mov r14, QWORD PTR [r9] mov r15, QWORD PTR [r9+8] mov rdi, QWORD PTR [r9+16] mov rsi, QWORD PTR [r9+24] add r9, 128 xor rbp, rbp L_2048_mont_reduce_avx2_32_loop: ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-96] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-88] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-80] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-88], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+-72] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-80], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+-64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-72], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+-56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-64], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+-48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-56], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+-40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-48], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9+-32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-40], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+-24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-32], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+-16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-24], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+-8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-16], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-8], r13 ; a[i+16] += m[16] * mu mulx rcx, rax, QWORD PTR [r10+128] mov r13, QWORD PTR [r9+8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9], r12 ; a[i+17] += m[17] * mu mulx rcx, rax, QWORD PTR [r10+136] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+18] += m[18] * mu mulx rcx, rax, QWORD PTR [r10+144] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+19] += m[19] * mu mulx rcx, rax, QWORD PTR [r10+152] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+20] += m[20] * mu mulx rcx, rax, QWORD PTR [r10+160] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 ; a[i+21] += m[21] * mu mulx rcx, rax, QWORD PTR [r10+168] mov r12, QWORD PTR [r9+48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+40], r13 ; a[i+22] += m[22] * mu mulx rcx, rax, QWORD PTR [r10+176] mov r13, QWORD PTR [r9+56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+48], r12 ; a[i+23] += m[23] * mu mulx rcx, rax, QWORD PTR [r10+184] mov r12, QWORD PTR [r9+64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+56], r13 ; a[i+24] += m[24] * mu mulx rcx, rax, QWORD PTR [r10+192] mov r13, QWORD PTR [r9+72] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+64], r12 ; a[i+25] += m[25] * mu mulx rcx, rax, QWORD PTR [r10+200] mov r12, QWORD PTR [r9+80] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+72], r13 ; a[i+26] += m[26] * mu mulx rcx, rax, QWORD PTR [r10+208] mov r13, QWORD PTR [r9+88] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+80], r12 ; a[i+27] += m[27] * mu mulx rcx, rax, QWORD PTR [r10+216] mov r12, QWORD PTR [r9+96] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+88], r13 ; a[i+28] += m[28] * mu mulx rcx, rax, QWORD PTR [r10+224] mov r13, QWORD PTR [r9+104] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+96], r12 ; a[i+29] += m[29] * mu mulx rcx, rax, QWORD PTR [r10+232] mov r12, QWORD PTR [r9+112] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+104], r13 ; a[i+30] += m[30] * mu mulx rcx, rax, QWORD PTR [r10+240] mov r13, QWORD PTR [r9+120] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+112], r12 ; a[i+31] += m[31] * mu mulx rcx, rax, QWORD PTR [r10+248] mov r12, QWORD PTR [r9+128] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+120], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+128], r12 adox rbp, rbx adcx rbp, rbx ; a += 1 add r9, 8 ; i -= 1 sub r11, 1 jnz L_2048_mont_reduce_avx2_32_loop sub r9, 128 neg rbp mov r8, r9 sub r9, 256 mov rcx, QWORD PTR [r10] mov rdx, r14 pext rcx, rcx, rbp sub rdx, rcx mov rcx, QWORD PTR [r10+8] mov rax, r15 pext rcx, rcx, rbp mov QWORD PTR [r9], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+16] mov rcx, rdi pext rdx, rdx, rbp mov QWORD PTR [r9+8], rax sbb rcx, rdx mov rax, QWORD PTR [r10+24] mov rdx, rsi pext rax, rax, rbp mov QWORD PTR [r9+16], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+32] mov rax, QWORD PTR [r8+32] pext rcx, rcx, rbp mov QWORD PTR [r9+24], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+40] mov rcx, QWORD PTR [r8+40] pext rdx, rdx, rbp mov QWORD PTR [r9+32], rax sbb rcx, rdx mov rax, QWORD PTR [r10+48] mov rdx, QWORD PTR [r8+48] pext rax, rax, rbp mov QWORD PTR [r9+40], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+56] mov rax, QWORD PTR [r8+56] pext rcx, rcx, rbp mov QWORD PTR [r9+48], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+64] mov rcx, QWORD PTR [r8+64] pext rdx, rdx, rbp mov QWORD PTR [r9+56], rax sbb rcx, rdx mov rax, QWORD PTR [r10+72] mov rdx, QWORD PTR [r8+72] pext rax, rax, rbp mov QWORD PTR [r9+64], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+80] mov rax, QWORD PTR [r8+80] pext rcx, rcx, rbp mov QWORD PTR [r9+72], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+88] mov rcx, QWORD PTR [r8+88] pext rdx, rdx, rbp mov QWORD PTR [r9+80], rax sbb rcx, rdx mov rax, QWORD PTR [r10+96] mov rdx, QWORD PTR [r8+96] pext rax, rax, rbp mov QWORD PTR [r9+88], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+104] mov rax, QWORD PTR [r8+104] pext rcx, rcx, rbp mov QWORD PTR [r9+96], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+112] mov rcx, QWORD PTR [r8+112] pext rdx, rdx, rbp mov QWORD PTR [r9+104], rax sbb rcx, rdx mov rax, QWORD PTR [r10+120] mov rdx, QWORD PTR [r8+120] pext rax, rax, rbp mov QWORD PTR [r9+112], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+128] mov rax, QWORD PTR [r8+128] pext rcx, rcx, rbp mov QWORD PTR [r9+120], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+136] mov rcx, QWORD PTR [r8+136] pext rdx, rdx, rbp mov QWORD PTR [r9+128], rax sbb rcx, rdx mov rax, QWORD PTR [r10+144] mov rdx, QWORD PTR [r8+144] pext rax, rax, rbp mov QWORD PTR [r9+136], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+152] mov rax, QWORD PTR [r8+152] pext rcx, rcx, rbp mov QWORD PTR [r9+144], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+160] mov rcx, QWORD PTR [r8+160] pext rdx, rdx, rbp mov QWORD PTR [r9+152], rax sbb rcx, rdx mov rax, QWORD PTR [r10+168] mov rdx, QWORD PTR [r8+168] pext rax, rax, rbp mov QWORD PTR [r9+160], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+176] mov rax, QWORD PTR [r8+176] pext rcx, rcx, rbp mov QWORD PTR [r9+168], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+184] mov rcx, QWORD PTR [r8+184] pext rdx, rdx, rbp mov QWORD PTR [r9+176], rax sbb rcx, rdx mov rax, QWORD PTR [r10+192] mov rdx, QWORD PTR [r8+192] pext rax, rax, rbp mov QWORD PTR [r9+184], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+200] mov rax, QWORD PTR [r8+200] pext rcx, rcx, rbp mov QWORD PTR [r9+192], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+208] mov rcx, QWORD PTR [r8+208] pext rdx, rdx, rbp mov QWORD PTR [r9+200], rax sbb rcx, rdx mov rax, QWORD PTR [r10+216] mov rdx, QWORD PTR [r8+216] pext rax, rax, rbp mov QWORD PTR [r9+208], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+224] mov rax, QWORD PTR [r8+224] pext rcx, rcx, rbp mov QWORD PTR [r9+216], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+232] mov rcx, QWORD PTR [r8+232] pext rdx, rdx, rbp mov QWORD PTR [r9+224], rax sbb rcx, rdx mov rax, QWORD PTR [r10+240] mov rdx, QWORD PTR [r8+240] pext rax, rax, rbp mov QWORD PTR [r9+232], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+248] mov rax, QWORD PTR [r8+248] pext rcx, rcx, rbp mov QWORD PTR [r9+240], rdx sbb rax, rcx mov QWORD PTR [r9+248], rax pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_2048_mont_reduce_avx2_32 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_2048_get_from_table_avx2_32 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax vpxor ymm13, ymm13, ymm13 vpermd ymm10, ymm13, ymm10 vpermd ymm11, ymm13, ymm11 ; START: 0-15 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 32 mov r9, QWORD PTR [rdx+256] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 33 mov r9, QWORD PTR [rdx+264] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 34 mov r9, QWORD PTR [rdx+272] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 35 mov r9, QWORD PTR [rdx+280] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 36 mov r9, QWORD PTR [rdx+288] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 37 mov r9, QWORD PTR [rdx+296] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 38 mov r9, QWORD PTR [rdx+304] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 39 mov r9, QWORD PTR [rdx+312] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 40 mov r9, QWORD PTR [rdx+320] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 41 mov r9, QWORD PTR [rdx+328] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 42 mov r9, QWORD PTR [rdx+336] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 43 mov r9, QWORD PTR [rdx+344] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 44 mov r9, QWORD PTR [rdx+352] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 45 mov r9, QWORD PTR [rdx+360] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 46 mov r9, QWORD PTR [rdx+368] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 47 mov r9, QWORD PTR [rdx+376] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 48 mov r9, QWORD PTR [rdx+384] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 49 mov r9, QWORD PTR [rdx+392] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 50 mov r9, QWORD PTR [rdx+400] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 51 mov r9, QWORD PTR [rdx+408] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 52 mov r9, QWORD PTR [rdx+416] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 53 mov r9, QWORD PTR [rdx+424] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 54 mov r9, QWORD PTR [rdx+432] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 55 mov r9, QWORD PTR [rdx+440] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 56 mov r9, QWORD PTR [rdx+448] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 57 mov r9, QWORD PTR [rdx+456] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 58 mov r9, QWORD PTR [rdx+464] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 59 mov r9, QWORD PTR [rdx+472] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 60 mov r9, QWORD PTR [rdx+480] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 61 mov r9, QWORD PTR [rdx+488] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 62 mov r9, QWORD PTR [rdx+496] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 63 mov r9, QWORD PTR [rdx+504] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 add rcx, 128 ; END: 0-15 ; START: 16-31 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 32 mov r9, QWORD PTR [rdx+256] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 33 mov r9, QWORD PTR [rdx+264] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 34 mov r9, QWORD PTR [rdx+272] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 35 mov r9, QWORD PTR [rdx+280] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 36 mov r9, QWORD PTR [rdx+288] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 37 mov r9, QWORD PTR [rdx+296] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 38 mov r9, QWORD PTR [rdx+304] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 39 mov r9, QWORD PTR [rdx+312] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 40 mov r9, QWORD PTR [rdx+320] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 41 mov r9, QWORD PTR [rdx+328] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 42 mov r9, QWORD PTR [rdx+336] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 43 mov r9, QWORD PTR [rdx+344] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 44 mov r9, QWORD PTR [rdx+352] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 45 mov r9, QWORD PTR [rdx+360] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 46 mov r9, QWORD PTR [rdx+368] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 47 mov r9, QWORD PTR [rdx+376] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 48 mov r9, QWORD PTR [rdx+384] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 49 mov r9, QWORD PTR [rdx+392] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 50 mov r9, QWORD PTR [rdx+400] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 51 mov r9, QWORD PTR [rdx+408] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 52 mov r9, QWORD PTR [rdx+416] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 53 mov r9, QWORD PTR [rdx+424] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 54 mov r9, QWORD PTR [rdx+432] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 55 mov r9, QWORD PTR [rdx+440] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 56 mov r9, QWORD PTR [rdx+448] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 57 mov r9, QWORD PTR [rdx+456] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 58 mov r9, QWORD PTR [rdx+464] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 59 mov r9, QWORD PTR [rdx+472] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 60 mov r9, QWORD PTR [rdx+480] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 61 mov r9, QWORD PTR [rdx+488] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 62 mov r9, QWORD PTR [rdx+496] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 63 mov r9, QWORD PTR [rdx+504] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 ; END: 16-31 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_2048_get_from_table_avx2_32 ENDP _text ENDS ENDIF ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. ; * ; * r A single precision number representing conditional add result. ; * a A single precision number to add with. ; * b A single precision number to add. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_2048_cond_add_16 PROC sub rsp, 128 mov rax, 0 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] add r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] adc r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] adc r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] adc r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] adc r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] adc r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] adc r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] adc r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] adc r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] adc r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] adc r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] adc r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] adc r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] adc r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] adc r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] adc r11, r8 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 adc rax, 0 add rsp, 128 ret sp_2048_cond_add_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. ; * ; * r A single precision number representing conditional add result. ; * a A single precision number to add with. ; * b A single precision number to add. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_2048_cond_add_avx2_16 PROC push r12 mov rax, 0 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 add r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 adc r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 adc r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 adc r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 adc r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 adc r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 adc r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 adc r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 adc r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 adc r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 adc r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 adc r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 adc r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 adc r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 adc r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 adc r10, r11 mov QWORD PTR [rcx+120], r10 adc rax, 0 pop r12 ret sp_2048_cond_add_avx2_16 ENDP _text ENDS ENDIF ; /* Shift number left by n bit. (r = a << n) ; * ; * r Result of left shift by n. ; * a Number to shift. ; * n Amoutnt o shift. ; */ _text SEGMENT READONLY PARA sp_2048_lshift_32 PROC push r12 push r13 mov rax, rcx mov cl, r8b mov r12, 0 mov r13, QWORD PTR [rdx+216] mov r8, QWORD PTR [rdx+224] mov r9, QWORD PTR [rdx+232] mov r10, QWORD PTR [rdx+240] mov r11, QWORD PTR [rdx+248] shld r12, r11, cl shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+224], r8 mov QWORD PTR [rax+232], r9 mov QWORD PTR [rax+240], r10 mov QWORD PTR [rax+248], r11 mov QWORD PTR [rax+256], r12 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rdx+192] mov r9, QWORD PTR [rdx+200] mov r10, QWORD PTR [rdx+208] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+192], r8 mov QWORD PTR [rax+200], r9 mov QWORD PTR [rax+208], r10 mov QWORD PTR [rax+216], r13 mov r13, QWORD PTR [rdx+152] mov r8, QWORD PTR [rdx+160] mov r9, QWORD PTR [rdx+168] mov r10, QWORD PTR [rdx+176] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+160], r8 mov QWORD PTR [rax+168], r9 mov QWORD PTR [rax+176], r10 mov QWORD PTR [rax+184], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rdx+128] mov r9, QWORD PTR [rdx+136] mov r10, QWORD PTR [rdx+144] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+128], r8 mov QWORD PTR [rax+136], r9 mov QWORD PTR [rax+144], r10 mov QWORD PTR [rax+152], r13 mov r13, QWORD PTR [rdx+88] mov r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rdx+104] mov r10, QWORD PTR [rdx+112] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+96], r8 mov QWORD PTR [rax+104], r9 mov QWORD PTR [rax+112], r10 mov QWORD PTR [rax+120], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rdx+72] mov r10, QWORD PTR [rdx+80] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+64], r8 mov QWORD PTR [rax+72], r9 mov QWORD PTR [rax+80], r10 mov QWORD PTR [rax+88], r13 mov r13, QWORD PTR [rdx+24] mov r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+32], r8 mov QWORD PTR [rax+40], r9 mov QWORD PTR [rax+48], r10 mov QWORD PTR [rax+56], r11 mov r8, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shl r8, cl mov QWORD PTR [rax], r8 mov QWORD PTR [rax+8], r9 mov QWORD PTR [rax+16], r10 mov QWORD PTR [rax+24], r13 pop r13 pop r12 ret sp_2048_lshift_32 ENDP _text ENDS ENDIF ENDIF IFNDEF WOLFSSL_SP_NO_3072 IFNDEF WOLFSSL_SP_NO_3072 ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_3072_from_bin_bswap PROC push r12 push r13 mov r11, r8 mov r12, rcx add r11, r9 add r12, 384 xor r13, r13 jmp L_3072_from_bin_bswap_64_end L_3072_from_bin_bswap_64_start: sub r11, 64 mov rax, QWORD PTR [r11+56] mov r10, QWORD PTR [r11+48] bswap rax bswap r10 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [r11+40] mov r10, QWORD PTR [r11+32] bswap rax bswap r10 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [r11+24] mov r10, QWORD PTR [r11+16] bswap rax bswap r10 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [r11+8] mov r10, QWORD PTR [r11] bswap rax bswap r10 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_3072_from_bin_bswap_64_end: cmp r9, 63 jg L_3072_from_bin_bswap_64_start jmp L_3072_from_bin_bswap_8_end L_3072_from_bin_bswap_8_start: sub r11, 8 mov rax, QWORD PTR [r11] bswap rax mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_3072_from_bin_bswap_8_end: cmp r9, 7 jg L_3072_from_bin_bswap_8_start cmp r9, r13 je L_3072_from_bin_bswap_hi_end mov r10, r13 mov rax, r13 L_3072_from_bin_bswap_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_3072_from_bin_bswap_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_3072_from_bin_bswap_hi_end: cmp rcx, r12 jge L_3072_from_bin_bswap_zero_end L_3072_from_bin_bswap_zero_start: mov QWORD PTR [rcx], r13 add rcx, 8 cmp rcx, r12 jl L_3072_from_bin_bswap_zero_start L_3072_from_bin_bswap_zero_end: pop r13 pop r12 ret sp_3072_from_bin_bswap ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_3072_from_bin_movbe PROC push r12 mov r11, r8 mov r12, rcx add r11, r9 add r12, 384 jmp L_3072_from_bin_movbe_64_end L_3072_from_bin_movbe_64_start: sub r11, 64 movbe rax, QWORD PTR [r11+56] movbe r10, QWORD PTR [r11+48] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 movbe rax, QWORD PTR [r11+40] movbe r10, QWORD PTR [r11+32] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 movbe rax, QWORD PTR [r11+24] movbe r10, QWORD PTR [r11+16] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 movbe rax, QWORD PTR [r11+8] movbe r10, QWORD PTR [r11] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_3072_from_bin_movbe_64_end: cmp r9, 63 jg L_3072_from_bin_movbe_64_start jmp L_3072_from_bin_movbe_8_end L_3072_from_bin_movbe_8_start: sub r11, 8 movbe rax, QWORD PTR [r11] mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_3072_from_bin_movbe_8_end: cmp r9, 7 jg L_3072_from_bin_movbe_8_start cmp r9, 0 je L_3072_from_bin_movbe_hi_end mov r10, 0 mov rax, 0 L_3072_from_bin_movbe_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_3072_from_bin_movbe_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_3072_from_bin_movbe_hi_end: cmp rcx, r12 jge L_3072_from_bin_movbe_zero_end L_3072_from_bin_movbe_zero_start: mov QWORD PTR [rcx], 0 add rcx, 8 cmp rcx, r12 jl L_3072_from_bin_movbe_zero_start L_3072_from_bin_movbe_zero_end: pop r12 ret sp_3072_from_bin_movbe ENDP _text ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 384 ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_3072_to_bin_bswap_48 PROC mov rax, QWORD PTR [rcx+376] mov r8, QWORD PTR [rcx+368] bswap rax bswap r8 mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 mov rax, QWORD PTR [rcx+360] mov r8, QWORD PTR [rcx+352] bswap rax bswap r8 mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 mov rax, QWORD PTR [rcx+344] mov r8, QWORD PTR [rcx+336] bswap rax bswap r8 mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 mov rax, QWORD PTR [rcx+328] mov r8, QWORD PTR [rcx+320] bswap rax bswap r8 mov QWORD PTR [rdx+48], rax mov QWORD PTR [rdx+56], r8 mov rax, QWORD PTR [rcx+312] mov r8, QWORD PTR [rcx+304] bswap rax bswap r8 mov QWORD PTR [rdx+64], rax mov QWORD PTR [rdx+72], r8 mov rax, QWORD PTR [rcx+296] mov r8, QWORD PTR [rcx+288] bswap rax bswap r8 mov QWORD PTR [rdx+80], rax mov QWORD PTR [rdx+88], r8 mov rax, QWORD PTR [rcx+280] mov r8, QWORD PTR [rcx+272] bswap rax bswap r8 mov QWORD PTR [rdx+96], rax mov QWORD PTR [rdx+104], r8 mov rax, QWORD PTR [rcx+264] mov r8, QWORD PTR [rcx+256] bswap rax bswap r8 mov QWORD PTR [rdx+112], rax mov QWORD PTR [rdx+120], r8 mov rax, QWORD PTR [rcx+248] mov r8, QWORD PTR [rcx+240] bswap rax bswap r8 mov QWORD PTR [rdx+128], rax mov QWORD PTR [rdx+136], r8 mov rax, QWORD PTR [rcx+232] mov r8, QWORD PTR [rcx+224] bswap rax bswap r8 mov QWORD PTR [rdx+144], rax mov QWORD PTR [rdx+152], r8 mov rax, QWORD PTR [rcx+216] mov r8, QWORD PTR [rcx+208] bswap rax bswap r8 mov QWORD PTR [rdx+160], rax mov QWORD PTR [rdx+168], r8 mov rax, QWORD PTR [rcx+200] mov r8, QWORD PTR [rcx+192] bswap rax bswap r8 mov QWORD PTR [rdx+176], rax mov QWORD PTR [rdx+184], r8 mov rax, QWORD PTR [rcx+184] mov r8, QWORD PTR [rcx+176] bswap rax bswap r8 mov QWORD PTR [rdx+192], rax mov QWORD PTR [rdx+200], r8 mov rax, QWORD PTR [rcx+168] mov r8, QWORD PTR [rcx+160] bswap rax bswap r8 mov QWORD PTR [rdx+208], rax mov QWORD PTR [rdx+216], r8 mov rax, QWORD PTR [rcx+152] mov r8, QWORD PTR [rcx+144] bswap rax bswap r8 mov QWORD PTR [rdx+224], rax mov QWORD PTR [rdx+232], r8 mov rax, QWORD PTR [rcx+136] mov r8, QWORD PTR [rcx+128] bswap rax bswap r8 mov QWORD PTR [rdx+240], rax mov QWORD PTR [rdx+248], r8 mov rax, QWORD PTR [rcx+120] mov r8, QWORD PTR [rcx+112] bswap rax bswap r8 mov QWORD PTR [rdx+256], rax mov QWORD PTR [rdx+264], r8 mov rax, QWORD PTR [rcx+104] mov r8, QWORD PTR [rcx+96] bswap rax bswap r8 mov QWORD PTR [rdx+272], rax mov QWORD PTR [rdx+280], r8 mov rax, QWORD PTR [rcx+88] mov r8, QWORD PTR [rcx+80] bswap rax bswap r8 mov QWORD PTR [rdx+288], rax mov QWORD PTR [rdx+296], r8 mov rax, QWORD PTR [rcx+72] mov r8, QWORD PTR [rcx+64] bswap rax bswap r8 mov QWORD PTR [rdx+304], rax mov QWORD PTR [rdx+312], r8 mov rax, QWORD PTR [rcx+56] mov r8, QWORD PTR [rcx+48] bswap rax bswap r8 mov QWORD PTR [rdx+320], rax mov QWORD PTR [rdx+328], r8 mov rax, QWORD PTR [rcx+40] mov r8, QWORD PTR [rcx+32] bswap rax bswap r8 mov QWORD PTR [rdx+336], rax mov QWORD PTR [rdx+344], r8 mov rax, QWORD PTR [rcx+24] mov r8, QWORD PTR [rcx+16] bswap rax bswap r8 mov QWORD PTR [rdx+352], rax mov QWORD PTR [rdx+360], r8 mov rax, QWORD PTR [rcx+8] mov r8, QWORD PTR [rcx] bswap rax bswap r8 mov QWORD PTR [rdx+368], rax mov QWORD PTR [rdx+376], r8 ret sp_3072_to_bin_bswap_48 ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 384 ; * Uses the movbe instruction which is optional. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_3072_to_bin_movbe_48 PROC movbe rax, QWORD PTR [rcx+376] movbe r8, QWORD PTR [rcx+368] mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 movbe rax, QWORD PTR [rcx+360] movbe r8, QWORD PTR [rcx+352] mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 movbe rax, QWORD PTR [rcx+344] movbe r8, QWORD PTR [rcx+336] mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 movbe rax, QWORD PTR [rcx+328] movbe r8, QWORD PTR [rcx+320] mov QWORD PTR [rdx+48], rax mov QWORD PTR [rdx+56], r8 movbe rax, QWORD PTR [rcx+312] movbe r8, QWORD PTR [rcx+304] mov QWORD PTR [rdx+64], rax mov QWORD PTR [rdx+72], r8 movbe rax, QWORD PTR [rcx+296] movbe r8, QWORD PTR [rcx+288] mov QWORD PTR [rdx+80], rax mov QWORD PTR [rdx+88], r8 movbe rax, QWORD PTR [rcx+280] movbe r8, QWORD PTR [rcx+272] mov QWORD PTR [rdx+96], rax mov QWORD PTR [rdx+104], r8 movbe rax, QWORD PTR [rcx+264] movbe r8, QWORD PTR [rcx+256] mov QWORD PTR [rdx+112], rax mov QWORD PTR [rdx+120], r8 movbe rax, QWORD PTR [rcx+248] movbe r8, QWORD PTR [rcx+240] mov QWORD PTR [rdx+128], rax mov QWORD PTR [rdx+136], r8 movbe rax, QWORD PTR [rcx+232] movbe r8, QWORD PTR [rcx+224] mov QWORD PTR [rdx+144], rax mov QWORD PTR [rdx+152], r8 movbe rax, QWORD PTR [rcx+216] movbe r8, QWORD PTR [rcx+208] mov QWORD PTR [rdx+160], rax mov QWORD PTR [rdx+168], r8 movbe rax, QWORD PTR [rcx+200] movbe r8, QWORD PTR [rcx+192] mov QWORD PTR [rdx+176], rax mov QWORD PTR [rdx+184], r8 movbe rax, QWORD PTR [rcx+184] movbe r8, QWORD PTR [rcx+176] mov QWORD PTR [rdx+192], rax mov QWORD PTR [rdx+200], r8 movbe rax, QWORD PTR [rcx+168] movbe r8, QWORD PTR [rcx+160] mov QWORD PTR [rdx+208], rax mov QWORD PTR [rdx+216], r8 movbe rax, QWORD PTR [rcx+152] movbe r8, QWORD PTR [rcx+144] mov QWORD PTR [rdx+224], rax mov QWORD PTR [rdx+232], r8 movbe rax, QWORD PTR [rcx+136] movbe r8, QWORD PTR [rcx+128] mov QWORD PTR [rdx+240], rax mov QWORD PTR [rdx+248], r8 movbe rax, QWORD PTR [rcx+120] movbe r8, QWORD PTR [rcx+112] mov QWORD PTR [rdx+256], rax mov QWORD PTR [rdx+264], r8 movbe rax, QWORD PTR [rcx+104] movbe r8, QWORD PTR [rcx+96] mov QWORD PTR [rdx+272], rax mov QWORD PTR [rdx+280], r8 movbe rax, QWORD PTR [rcx+88] movbe r8, QWORD PTR [rcx+80] mov QWORD PTR [rdx+288], rax mov QWORD PTR [rdx+296], r8 movbe rax, QWORD PTR [rcx+72] movbe r8, QWORD PTR [rcx+64] mov QWORD PTR [rdx+304], rax mov QWORD PTR [rdx+312], r8 movbe rax, QWORD PTR [rcx+56] movbe r8, QWORD PTR [rcx+48] mov QWORD PTR [rdx+320], rax mov QWORD PTR [rdx+328], r8 movbe rax, QWORD PTR [rcx+40] movbe r8, QWORD PTR [rcx+32] mov QWORD PTR [rdx+336], rax mov QWORD PTR [rdx+344], r8 movbe rax, QWORD PTR [rcx+24] movbe r8, QWORD PTR [rcx+16] mov QWORD PTR [rdx+352], rax mov QWORD PTR [rdx+360], r8 movbe rax, QWORD PTR [rcx+8] movbe r8, QWORD PTR [rcx] mov QWORD PTR [rdx+368], rax mov QWORD PTR [rdx+376], r8 ret sp_3072_to_bin_movbe_48 ENDP _text ENDS ENDIF ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_mul_12 PROC push r12 mov r9, rdx sub rsp, 96 ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9] xor r12, r12 mov QWORD PTR [rsp], rax mov r11, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+8], r11 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+16], r12 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+24], r10 ; A[0] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+32], r11 ; A[0] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+40], r12 ; A[0] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+48], r10 ; A[0] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+56], r11 ; A[0] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+64], r12 ; A[0] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+72], r10 ; A[0] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+80], r11 ; A[0] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+88], r12 ; A[1] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+8] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+96], r10 ; A[2] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+16] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+104], r11 ; A[3] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+24] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+112], r12 ; A[4] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+32] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+120], r10 ; A[5] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+40] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+128], r11 ; A[6] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+48] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+136], r12 ; A[7] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+56] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+144], r10 ; A[8] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+64] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+152], r11 ; A[9] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+72] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+160], r12 ; A[10] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+80] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+168], r10 ; A[11] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx mov QWORD PTR [rcx+176], r11 mov QWORD PTR [rcx+184], r12 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r10, QWORD PTR [rsp+16] mov r11, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r10, QWORD PTR [rsp+48] mov r11, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rsp+64] mov rdx, QWORD PTR [rsp+72] mov r10, QWORD PTR [rsp+80] mov r11, QWORD PTR [rsp+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], rdx mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 add rsp, 96 pop r12 ret sp_3072_mul_12 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r Result of multiplication. ; * a First number to multiply. ; * b Second number to multiply. ; */ _text SEGMENT READONLY PARA sp_3072_mul_avx2_12 PROC push rbx push rbp push r12 push r13 push r14 mov rbp, r8 mov r8, rcx mov r9, rdx sub rsp, 96 cmp r9, r8 mov rbx, rsp cmovne rbx, r8 cmp rbp, r8 cmove rbx, rsp add r8, 96 xor r14, r14 mov rdx, QWORD PTR [r9] ; A[0] * B[0] mulx r11, r10, QWORD PTR [rbp] ; A[0] * B[1] mulx r12, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx], r10 adcx r11, rax mov QWORD PTR [rbx+8], r11 ; A[0] * B[2] mulx r10, rax, QWORD PTR [rbp+16] adcx r12, rax ; A[0] * B[3] mulx r11, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+16], r12 adcx r10, rax mov QWORD PTR [rbx+24], r10 ; A[0] * B[4] mulx r12, rax, QWORD PTR [rbp+32] adcx r11, rax ; A[0] * B[5] mulx r10, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+32], r11 adcx r12, rax mov QWORD PTR [rbx+40], r12 ; A[0] * B[6] mulx r11, rax, QWORD PTR [rbp+48] adcx r10, rax ; A[0] * B[7] mulx r12, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+48], r10 adcx r11, rax mov QWORD PTR [rbx+56], r11 ; A[0] * B[8] mulx r10, rax, QWORD PTR [rbp+64] adcx r12, rax ; A[0] * B[9] mulx r11, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+64], r12 adcx r10, rax mov QWORD PTR [rbx+72], r10 ; A[0] * B[10] mulx r12, rax, QWORD PTR [rbp+80] adcx r11, rax ; A[0] * B[11] mulx r10, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+80], r11 adcx r12, rax adcx r10, r14 mov r13, r14 adcx r13, r14 mov QWORD PTR [rbx+88], r12 mov QWORD PTR [r8], r10 mov rdx, QWORD PTR [r9+8] mov r11, QWORD PTR [rbx+8] mov r12, QWORD PTR [rbx+16] mov r10, QWORD PTR [rbx+24] ; A[1] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[1] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+8], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+16], r12 mov r11, QWORD PTR [rbx+32] mov r12, QWORD PTR [rbx+40] ; A[1] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r10, rax adox r11, rcx ; A[1] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+24], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+32], r11 mov r10, QWORD PTR [rbx+48] mov r11, QWORD PTR [rbx+56] ; A[1] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r10, rcx ; A[1] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+40], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+48], r10 mov r12, QWORD PTR [rbx+64] mov r10, QWORD PTR [rbx+72] ; A[1] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[1] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+56], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+64], r12 mov r11, QWORD PTR [rbx+80] mov r12, QWORD PTR [rbx+88] ; A[1] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[1] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+72], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+80], r11 mov r10, QWORD PTR [r8] ; A[1] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r12, rax adox r10, rcx ; A[1] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+88], r12 mov r11, r14 adcx r10, rax adox r11, rcx adcx r11, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8], r10 mov QWORD PTR [r8+8], r11 mov rdx, QWORD PTR [r9+16] mov r12, QWORD PTR [rbx+16] mov r10, QWORD PTR [rbx+24] mov r11, QWORD PTR [rbx+32] ; A[2] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r10, rcx ; A[2] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+16], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+24], r10 mov r12, QWORD PTR [rbx+40] mov r10, QWORD PTR [rbx+48] ; A[2] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r11, rax adox r12, rcx ; A[2] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+32], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+40], r12 mov r11, QWORD PTR [rbx+56] mov r12, QWORD PTR [rbx+64] ; A[2] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[2] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+48], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+56], r11 mov r10, QWORD PTR [rbx+72] mov r11, QWORD PTR [rbx+80] ; A[2] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r10, rcx ; A[2] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+64], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+72], r10 mov r12, QWORD PTR [rbx+88] mov r10, QWORD PTR [r8] ; A[2] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[2] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+80], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+88], r12 mov r11, QWORD PTR [r8+8] ; A[2] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r10, rax adox r11, rcx ; A[2] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8], r10 mov r12, r14 adcx r11, rax adox r12, rcx adcx r12, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+8], r11 mov QWORD PTR [r8+16], r12 mov rdx, QWORD PTR [r9+24] mov r10, QWORD PTR [rbx+24] mov r11, QWORD PTR [rbx+32] mov r12, QWORD PTR [rbx+40] ; A[3] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[3] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+24], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+32], r11 mov r10, QWORD PTR [rbx+48] mov r11, QWORD PTR [rbx+56] ; A[3] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r12, rax adox r10, rcx ; A[3] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+40], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+48], r10 mov r12, QWORD PTR [rbx+64] mov r10, QWORD PTR [rbx+72] ; A[3] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[3] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+56], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+64], r12 mov r11, QWORD PTR [rbx+80] mov r12, QWORD PTR [rbx+88] ; A[3] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r10, rax adox r11, rcx ; A[3] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+72], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+80], r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[3] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r10, rcx ; A[3] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+88], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] ; A[3] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r11, rax adox r12, rcx ; A[3] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+8], r11 mov r10, r14 adcx r12, rax adox r10, rcx adcx r10, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+16], r12 mov QWORD PTR [r8+24], r10 mov rdx, QWORD PTR [r9+32] mov r11, QWORD PTR [rbx+32] mov r12, QWORD PTR [rbx+40] mov r10, QWORD PTR [rbx+48] ; A[4] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[4] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+32], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+40], r12 mov r11, QWORD PTR [rbx+56] mov r12, QWORD PTR [rbx+64] ; A[4] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r10, rax adox r11, rcx ; A[4] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+48], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+56], r11 mov r10, QWORD PTR [rbx+72] mov r11, QWORD PTR [rbx+80] ; A[4] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r10, rcx ; A[4] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+64], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+72], r10 mov r12, QWORD PTR [rbx+88] mov r10, QWORD PTR [r8] ; A[4] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[4] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+80], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+88], r12 mov r11, QWORD PTR [r8+8] mov r12, QWORD PTR [r8+16] ; A[4] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[4] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+8], r11 mov r10, QWORD PTR [r8+24] ; A[4] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r12, rax adox r10, rcx ; A[4] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+16], r12 mov r11, r14 adcx r10, rax adox r11, rcx adcx r11, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+24], r10 mov QWORD PTR [r8+32], r11 mov rdx, QWORD PTR [r9+40] mov r12, QWORD PTR [rbx+40] mov r10, QWORD PTR [rbx+48] mov r11, QWORD PTR [rbx+56] ; A[5] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r10, rcx ; A[5] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+40], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+48], r10 mov r12, QWORD PTR [rbx+64] mov r10, QWORD PTR [rbx+72] ; A[5] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r11, rax adox r12, rcx ; A[5] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+56], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+64], r12 mov r11, QWORD PTR [rbx+80] mov r12, QWORD PTR [rbx+88] ; A[5] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[5] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+72], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+80], r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[5] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r10, rcx ; A[5] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+88], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[5] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[5] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+8], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] ; A[5] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r10, rax adox r11, rcx ; A[5] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+24], r10 mov r12, r14 adcx r11, rax adox r12, rcx adcx r12, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+32], r11 mov QWORD PTR [r8+40], r12 mov rdx, QWORD PTR [r9+48] mov r10, QWORD PTR [rbx+48] mov r11, QWORD PTR [rbx+56] mov r12, QWORD PTR [rbx+64] ; A[6] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[6] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+48], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+56], r11 mov r10, QWORD PTR [rbx+72] mov r11, QWORD PTR [rbx+80] ; A[6] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r12, rax adox r10, rcx ; A[6] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+64], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+72], r10 mov r12, QWORD PTR [rbx+88] mov r10, QWORD PTR [r8] ; A[6] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[6] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+80], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+88], r12 mov r11, QWORD PTR [r8+8] mov r12, QWORD PTR [r8+16] ; A[6] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r10, rax adox r11, rcx ; A[6] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+8], r11 mov r10, QWORD PTR [r8+24] mov r11, QWORD PTR [r8+32] ; A[6] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r10, rcx ; A[6] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+16], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r10 mov r12, QWORD PTR [r8+40] ; A[6] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r11, rax adox r12, rcx ; A[6] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+32], r11 mov r10, r14 adcx r12, rax adox r10, rcx adcx r10, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+40], r12 mov QWORD PTR [r8+48], r10 mov rdx, QWORD PTR [r9+56] mov r11, QWORD PTR [rbx+56] mov r12, QWORD PTR [rbx+64] mov r10, QWORD PTR [rbx+72] ; A[7] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[7] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+56], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+64], r12 mov r11, QWORD PTR [rbx+80] mov r12, QWORD PTR [rbx+88] ; A[7] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r10, rax adox r11, rcx ; A[7] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+72], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+80], r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[7] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r10, rcx ; A[7] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+88], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[7] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[7] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+8], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] mov r12, QWORD PTR [r8+40] ; A[7] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[7] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+24], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+32], r11 mov r10, QWORD PTR [r8+48] ; A[7] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r12, rax adox r10, rcx ; A[7] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+40], r12 mov r11, r14 adcx r10, rax adox r11, rcx adcx r11, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+48], r10 mov QWORD PTR [r8+56], r11 mov rdx, QWORD PTR [r9+64] mov r12, QWORD PTR [rbx+64] mov r10, QWORD PTR [rbx+72] mov r11, QWORD PTR [rbx+80] ; A[8] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r10, rcx ; A[8] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+64], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+72], r10 mov r12, QWORD PTR [rbx+88] mov r10, QWORD PTR [r8] ; A[8] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r11, rax adox r12, rcx ; A[8] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+80], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+88], r12 mov r11, QWORD PTR [r8+8] mov r12, QWORD PTR [r8+16] ; A[8] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[8] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+8], r11 mov r10, QWORD PTR [r8+24] mov r11, QWORD PTR [r8+32] ; A[8] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r10, rcx ; A[8] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+16], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r10 mov r12, QWORD PTR [r8+40] mov r10, QWORD PTR [r8+48] ; A[8] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[8] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+32], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [r8+40], r12 mov r11, QWORD PTR [r8+56] ; A[8] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r10, rax adox r11, rcx ; A[8] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+48], r10 mov r12, r14 adcx r11, rax adox r12, rcx adcx r12, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+56], r11 mov QWORD PTR [r8+64], r12 mov rdx, QWORD PTR [r9+72] mov r10, QWORD PTR [rbx+72] mov r11, QWORD PTR [rbx+80] mov r12, QWORD PTR [rbx+88] ; A[9] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[9] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+72], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+80], r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[9] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r12, rax adox r10, rcx ; A[9] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+88], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[9] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[9] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+8], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] mov r12, QWORD PTR [r8+40] ; A[9] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r10, rax adox r11, rcx ; A[9] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+24], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+32], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] ; A[9] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r10, rcx ; A[9] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+40], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+48], r10 mov r12, QWORD PTR [r8+64] ; A[9] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r11, rax adox r12, rcx ; A[9] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+56], r11 mov r10, r14 adcx r12, rax adox r10, rcx adcx r10, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+64], r12 mov QWORD PTR [r8+72], r10 mov rdx, QWORD PTR [r9+80] mov r11, QWORD PTR [rbx+80] mov r12, QWORD PTR [rbx+88] mov r10, QWORD PTR [r8] ; A[10] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[10] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+80], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [rbx+88], r12 mov r11, QWORD PTR [r8+8] mov r12, QWORD PTR [r8+16] ; A[10] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r10, rax adox r11, rcx ; A[10] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [r8], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+8], r11 mov r10, QWORD PTR [r8+24] mov r11, QWORD PTR [r8+32] ; A[10] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r10, rcx ; A[10] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+16], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r10 mov r12, QWORD PTR [r8+40] mov r10, QWORD PTR [r8+48] ; A[10] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[10] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+32], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [r8+40], r12 mov r11, QWORD PTR [r8+56] mov r12, QWORD PTR [r8+64] ; A[10] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[10] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+48], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+56], r11 mov r10, QWORD PTR [r8+72] ; A[10] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r12, rax adox r10, rcx ; A[10] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+64], r12 mov r11, r14 adcx r10, rax adox r11, rcx adcx r11, r13 mov r13, r14 adox r13, r14 adcx r13, r14 mov QWORD PTR [r8+72], r10 mov QWORD PTR [r8+80], r11 mov rdx, QWORD PTR [r9+88] mov r12, QWORD PTR [rbx+88] mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[11] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r10, rcx ; A[11] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+88], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[11] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] adcx r11, rax adox r12, rcx ; A[11] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [r8+8], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] mov r12, QWORD PTR [r8+40] ; A[11] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[11] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+24], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+32], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] ; A[11] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r10, rcx ; A[11] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+40], r12 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+48], r10 mov r12, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] ; A[11] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[11] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+56], r11 adcx r12, rax adox r10, rcx mov QWORD PTR [r8+64], r12 mov r11, QWORD PTR [r8+80] ; A[11] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] adcx r10, rax adox r11, rcx ; A[11] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+72], r10 mov r12, r14 adcx r11, rax adox r12, rcx adcx r12, r13 mov QWORD PTR [r8+80], r11 mov QWORD PTR [r8+88], r12 sub r8, 96 cmp r9, r8 je L_start_3072_mul_avx2_12 cmp rbp, r8 jne L_end_3072_mul_avx2_12 L_start_3072_mul_avx2_12: vmovdqu xmm0, OWORD PTR [rbx] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbx+16] vmovups OWORD PTR [r8+16], xmm0 vmovdqu xmm0, OWORD PTR [rbx+32] vmovups OWORD PTR [r8+32], xmm0 vmovdqu xmm0, OWORD PTR [rbx+48] vmovups OWORD PTR [r8+48], xmm0 vmovdqu xmm0, OWORD PTR [rbx+64] vmovups OWORD PTR [r8+64], xmm0 vmovdqu xmm0, OWORD PTR [rbx+80] vmovups OWORD PTR [r8+80], xmm0 L_end_3072_mul_avx2_12: add rsp, 96 pop r14 pop r13 pop r12 pop rbp pop rbx ret sp_3072_mul_avx2_12 ENDP _text ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_add_12 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 adc r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 adc r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 adc r10, QWORD PTR [r8+88] mov QWORD PTR [rcx+88], r10 adc rax, 0 ret sp_3072_add_12 ENDP _text ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sub_in_place_24 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] mov r9, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb r9, QWORD PTR [rdx+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], r9 sbb r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb r9, QWORD PTR [rdx+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], r9 sbb r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb r9, QWORD PTR [rdx+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], r9 sbb r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb r9, QWORD PTR [rdx+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], r9 sbb r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb r9, QWORD PTR [rdx+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], r9 sbb r8, QWORD PTR [rdx+80] mov r9, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb r9, QWORD PTR [rdx+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], r9 sbb r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb r9, QWORD PTR [rdx+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], r9 sbb r8, QWORD PTR [rdx+112] mov r9, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb r9, QWORD PTR [rdx+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], r9 sbb r8, QWORD PTR [rdx+128] mov r9, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb r9, QWORD PTR [rdx+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], r9 sbb r8, QWORD PTR [rdx+144] mov r9, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb r9, QWORD PTR [rdx+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], r9 sbb r8, QWORD PTR [rdx+160] mov r9, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb r9, QWORD PTR [rdx+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], r9 sbb r8, QWORD PTR [rdx+176] mov r9, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb r9, QWORD PTR [rdx+184] mov QWORD PTR [rcx+184], r9 sbb rax, rax ret sp_3072_sub_in_place_24 ENDP _text ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_add_24 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 adc r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 adc r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 adc r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 adc r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 adc r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 adc r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 adc r10, QWORD PTR [r8+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [rcx+120], r10 adc r9, QWORD PTR [r8+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [rcx+128], r9 adc r10, QWORD PTR [r8+136] mov r9, QWORD PTR [rdx+144] mov QWORD PTR [rcx+136], r10 adc r9, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+152] mov QWORD PTR [rcx+144], r9 adc r10, QWORD PTR [r8+152] mov r9, QWORD PTR [rdx+160] mov QWORD PTR [rcx+152], r10 adc r9, QWORD PTR [r8+160] mov r10, QWORD PTR [rdx+168] mov QWORD PTR [rcx+160], r9 adc r10, QWORD PTR [r8+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [rcx+168], r10 adc r9, QWORD PTR [r8+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [rcx+176], r9 adc r10, QWORD PTR [r8+184] mov QWORD PTR [rcx+184], r10 adc rax, 0 ret sp_3072_add_24 ENDP _text ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_mul_24 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 616 mov QWORD PTR [rsp+576], rcx mov QWORD PTR [rsp+584], rdx mov QWORD PTR [rsp+592], r8 lea r12, QWORD PTR [rsp+384] lea r14, QWORD PTR [rdx+96] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov QWORD PTR [r12+88], r10 adc r15, 0 mov QWORD PTR [rsp+600], r15 lea r13, QWORD PTR [rsp+480] lea r14, QWORD PTR [r8+96] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov QWORD PTR [r13+88], r10 adc rdi, 0 mov QWORD PTR [rsp+608], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_3072_mul_12 mov r8, QWORD PTR [rsp+592] mov rdx, QWORD PTR [rsp+584] lea rcx, QWORD PTR [rsp+192] add r8, 96 add rdx, 96 call sp_3072_mul_12 mov r8, QWORD PTR [rsp+592] mov rdx, QWORD PTR [rsp+584] mov rcx, QWORD PTR [rsp+576] call sp_3072_mul_12 IFDEF _WIN64 mov r8, QWORD PTR [rsp+592] mov rdx, QWORD PTR [rsp+584] mov rcx, QWORD PTR [rsp+576] ENDIF mov r15, QWORD PTR [rsp+600] mov rdi, QWORD PTR [rsp+608] mov rsi, QWORD PTR [rsp+576] mov r11, r15 lea r12, QWORD PTR [rsp+384] lea r13, QWORD PTR [rsp+480] and r11, rdi neg r15 neg rdi add rsi, 192 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] and rax, rdi and r9, r15 mov QWORD PTR [r12], rax mov QWORD PTR [r13], r9 mov rax, QWORD PTR [r12+8] mov r9, QWORD PTR [r13+8] and rax, rdi and r9, r15 mov QWORD PTR [r12+8], rax mov QWORD PTR [r13+8], r9 mov rax, QWORD PTR [r12+16] mov r9, QWORD PTR [r13+16] and rax, rdi and r9, r15 mov QWORD PTR [r12+16], rax mov QWORD PTR [r13+16], r9 mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] and rax, rdi and r9, r15 mov QWORD PTR [r12+24], rax mov QWORD PTR [r13+24], r9 mov rax, QWORD PTR [r12+32] mov r9, QWORD PTR [r13+32] and rax, rdi and r9, r15 mov QWORD PTR [r12+32], rax mov QWORD PTR [r13+32], r9 mov rax, QWORD PTR [r12+40] mov r9, QWORD PTR [r13+40] and rax, rdi and r9, r15 mov QWORD PTR [r12+40], rax mov QWORD PTR [r13+40], r9 mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] and rax, rdi and r9, r15 mov QWORD PTR [r12+48], rax mov QWORD PTR [r13+48], r9 mov rax, QWORD PTR [r12+56] mov r9, QWORD PTR [r13+56] and rax, rdi and r9, r15 mov QWORD PTR [r12+56], rax mov QWORD PTR [r13+56], r9 mov rax, QWORD PTR [r12+64] mov r9, QWORD PTR [r13+64] and rax, rdi and r9, r15 mov QWORD PTR [r12+64], rax mov QWORD PTR [r13+64], r9 mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] and rax, rdi and r9, r15 mov QWORD PTR [r12+72], rax mov QWORD PTR [r13+72], r9 mov rax, QWORD PTR [r12+80] mov r9, QWORD PTR [r13+80] and rax, rdi and r9, r15 mov QWORD PTR [r12+80], rax mov QWORD PTR [r13+80], r9 mov rax, QWORD PTR [r12+88] mov r9, QWORD PTR [r13+88] and rax, rdi and r9, r15 mov QWORD PTR [r12+88], rax mov QWORD PTR [r13+88], r9 mov rax, QWORD PTR [r12] add rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov QWORD PTR [rsi+88], r10 adc r11, 0 lea r13, QWORD PTR [rsp+192] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov QWORD PTR [r12+184], r10 sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov QWORD PTR [r12+184], r10 sbb r11, 0 sub rsi, 96 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov QWORD PTR [rsi+184], r10 adc r11, 0 mov QWORD PTR [rcx+288], r11 add rsi, 96 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov QWORD PTR [rsi+96], rax ; Add to zero mov rax, QWORD PTR [r13+104] adc rax, 0 mov r9, QWORD PTR [r13+112] mov QWORD PTR [rsi+104], rax adc r9, 0 mov r10, QWORD PTR [r13+120] mov QWORD PTR [rsi+112], r9 adc r10, 0 mov rax, QWORD PTR [r13+128] mov QWORD PTR [rsi+120], r10 adc rax, 0 mov r9, QWORD PTR [r13+136] mov QWORD PTR [rsi+128], rax adc r9, 0 mov r10, QWORD PTR [r13+144] mov QWORD PTR [rsi+136], r9 adc r10, 0 mov rax, QWORD PTR [r13+152] mov QWORD PTR [rsi+144], r10 adc rax, 0 mov r9, QWORD PTR [r13+160] mov QWORD PTR [rsi+152], rax adc r9, 0 mov r10, QWORD PTR [r13+168] mov QWORD PTR [rsi+160], r9 adc r10, 0 mov rax, QWORD PTR [r13+176] mov QWORD PTR [rsi+168], r10 adc rax, 0 mov r9, QWORD PTR [r13+184] mov QWORD PTR [rsi+176], rax adc r9, 0 mov QWORD PTR [rsi+184], r9 add rsp, 616 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mul_24 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_mul_avx2_24 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 616 mov QWORD PTR [rsp+576], rcx mov QWORD PTR [rsp+584], rdx mov QWORD PTR [rsp+592], r8 lea r12, QWORD PTR [rsp+384] lea r14, QWORD PTR [rdx+96] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov QWORD PTR [r12+88], r10 adc r15, 0 mov QWORD PTR [rsp+600], r15 lea r13, QWORD PTR [rsp+480] lea r14, QWORD PTR [r8+96] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov QWORD PTR [r13+88], r10 adc rdi, 0 mov QWORD PTR [rsp+608], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_3072_mul_avx2_12 mov r8, QWORD PTR [rsp+592] mov rdx, QWORD PTR [rsp+584] lea rcx, QWORD PTR [rsp+192] add r8, 96 add rdx, 96 call sp_3072_mul_avx2_12 mov r8, QWORD PTR [rsp+592] mov rdx, QWORD PTR [rsp+584] mov rcx, QWORD PTR [rsp+576] call sp_3072_mul_avx2_12 IFDEF _WIN64 mov r8, QWORD PTR [rsp+592] mov rdx, QWORD PTR [rsp+584] mov rcx, QWORD PTR [rsp+576] ENDIF mov r15, QWORD PTR [rsp+600] mov rdi, QWORD PTR [rsp+608] mov rsi, QWORD PTR [rsp+576] mov r11, r15 lea r12, QWORD PTR [rsp+384] lea r13, QWORD PTR [rsp+480] and r11, rdi neg r15 neg rdi add rsi, 192 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] pext rax, rax, rdi pext r9, r9, r15 add rax, r9 mov r9, QWORD PTR [r12+8] mov r10, QWORD PTR [r13+8] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi], rax adc r9, r10 mov r10, QWORD PTR [r12+16] mov rax, QWORD PTR [r13+16] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+8], r9 adc r10, rax mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+16], r10 adc rax, r9 mov r9, QWORD PTR [r12+32] mov r10, QWORD PTR [r13+32] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+24], rax adc r9, r10 mov r10, QWORD PTR [r12+40] mov rax, QWORD PTR [r13+40] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+32], r9 adc r10, rax mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+40], r10 adc rax, r9 mov r9, QWORD PTR [r12+56] mov r10, QWORD PTR [r13+56] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+48], rax adc r9, r10 mov r10, QWORD PTR [r12+64] mov rax, QWORD PTR [r13+64] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+56], r9 adc r10, rax mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+64], r10 adc rax, r9 mov r9, QWORD PTR [r12+80] mov r10, QWORD PTR [r13+80] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+72], rax adc r9, r10 mov r10, QWORD PTR [r12+88] mov rax, QWORD PTR [r13+88] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+80], r9 adc r10, rax mov QWORD PTR [rsi+88], r10 adc r11, 0 lea r13, QWORD PTR [rsp+192] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov QWORD PTR [r12+184], r10 sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov QWORD PTR [r12+184], r10 sbb r11, 0 sub rsi, 96 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov QWORD PTR [rsi+184], r10 adc r11, 0 mov QWORD PTR [rcx+288], r11 add rsi, 96 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov QWORD PTR [rsi+96], rax ; Add to zero mov rax, QWORD PTR [r13+104] adc rax, 0 mov r9, QWORD PTR [r13+112] mov QWORD PTR [rsi+104], rax adc r9, 0 mov r10, QWORD PTR [r13+120] mov QWORD PTR [rsi+112], r9 adc r10, 0 mov rax, QWORD PTR [r13+128] mov QWORD PTR [rsi+120], r10 adc rax, 0 mov r9, QWORD PTR [r13+136] mov QWORD PTR [rsi+128], rax adc r9, 0 mov r10, QWORD PTR [r13+144] mov QWORD PTR [rsi+136], r9 adc r10, 0 mov rax, QWORD PTR [r13+152] mov QWORD PTR [rsi+144], r10 adc rax, 0 mov r9, QWORD PTR [r13+160] mov QWORD PTR [rsi+152], rax adc r9, 0 mov r10, QWORD PTR [r13+168] mov QWORD PTR [rsi+160], r9 adc r10, 0 mov rax, QWORD PTR [r13+176] mov QWORD PTR [rsi+168], r10 adc rax, 0 mov r9, QWORD PTR [r13+184] mov QWORD PTR [rsi+176], rax adc r9, 0 mov QWORD PTR [rsi+184], r9 add rsp, 616 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mul_avx2_24 ENDP _text ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sub_in_place_48 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] mov r9, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb r9, QWORD PTR [rdx+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], r9 sbb r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb r9, QWORD PTR [rdx+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], r9 sbb r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb r9, QWORD PTR [rdx+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], r9 sbb r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb r9, QWORD PTR [rdx+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], r9 sbb r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb r9, QWORD PTR [rdx+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], r9 sbb r8, QWORD PTR [rdx+80] mov r9, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb r9, QWORD PTR [rdx+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], r9 sbb r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb r9, QWORD PTR [rdx+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], r9 sbb r8, QWORD PTR [rdx+112] mov r9, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb r9, QWORD PTR [rdx+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], r9 sbb r8, QWORD PTR [rdx+128] mov r9, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb r9, QWORD PTR [rdx+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], r9 sbb r8, QWORD PTR [rdx+144] mov r9, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb r9, QWORD PTR [rdx+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], r9 sbb r8, QWORD PTR [rdx+160] mov r9, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb r9, QWORD PTR [rdx+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], r9 sbb r8, QWORD PTR [rdx+176] mov r9, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb r9, QWORD PTR [rdx+184] mov r8, QWORD PTR [rcx+192] mov QWORD PTR [rcx+184], r9 sbb r8, QWORD PTR [rdx+192] mov r9, QWORD PTR [rcx+200] mov QWORD PTR [rcx+192], r8 sbb r9, QWORD PTR [rdx+200] mov r8, QWORD PTR [rcx+208] mov QWORD PTR [rcx+200], r9 sbb r8, QWORD PTR [rdx+208] mov r9, QWORD PTR [rcx+216] mov QWORD PTR [rcx+208], r8 sbb r9, QWORD PTR [rdx+216] mov r8, QWORD PTR [rcx+224] mov QWORD PTR [rcx+216], r9 sbb r8, QWORD PTR [rdx+224] mov r9, QWORD PTR [rcx+232] mov QWORD PTR [rcx+224], r8 sbb r9, QWORD PTR [rdx+232] mov r8, QWORD PTR [rcx+240] mov QWORD PTR [rcx+232], r9 sbb r8, QWORD PTR [rdx+240] mov r9, QWORD PTR [rcx+248] mov QWORD PTR [rcx+240], r8 sbb r9, QWORD PTR [rdx+248] mov r8, QWORD PTR [rcx+256] mov QWORD PTR [rcx+248], r9 sbb r8, QWORD PTR [rdx+256] mov r9, QWORD PTR [rcx+264] mov QWORD PTR [rcx+256], r8 sbb r9, QWORD PTR [rdx+264] mov r8, QWORD PTR [rcx+272] mov QWORD PTR [rcx+264], r9 sbb r8, QWORD PTR [rdx+272] mov r9, QWORD PTR [rcx+280] mov QWORD PTR [rcx+272], r8 sbb r9, QWORD PTR [rdx+280] mov r8, QWORD PTR [rcx+288] mov QWORD PTR [rcx+280], r9 sbb r8, QWORD PTR [rdx+288] mov r9, QWORD PTR [rcx+296] mov QWORD PTR [rcx+288], r8 sbb r9, QWORD PTR [rdx+296] mov r8, QWORD PTR [rcx+304] mov QWORD PTR [rcx+296], r9 sbb r8, QWORD PTR [rdx+304] mov r9, QWORD PTR [rcx+312] mov QWORD PTR [rcx+304], r8 sbb r9, QWORD PTR [rdx+312] mov r8, QWORD PTR [rcx+320] mov QWORD PTR [rcx+312], r9 sbb r8, QWORD PTR [rdx+320] mov r9, QWORD PTR [rcx+328] mov QWORD PTR [rcx+320], r8 sbb r9, QWORD PTR [rdx+328] mov r8, QWORD PTR [rcx+336] mov QWORD PTR [rcx+328], r9 sbb r8, QWORD PTR [rdx+336] mov r9, QWORD PTR [rcx+344] mov QWORD PTR [rcx+336], r8 sbb r9, QWORD PTR [rdx+344] mov r8, QWORD PTR [rcx+352] mov QWORD PTR [rcx+344], r9 sbb r8, QWORD PTR [rdx+352] mov r9, QWORD PTR [rcx+360] mov QWORD PTR [rcx+352], r8 sbb r9, QWORD PTR [rdx+360] mov r8, QWORD PTR [rcx+368] mov QWORD PTR [rcx+360], r9 sbb r8, QWORD PTR [rdx+368] mov r9, QWORD PTR [rcx+376] mov QWORD PTR [rcx+368], r8 sbb r9, QWORD PTR [rdx+376] mov QWORD PTR [rcx+376], r9 sbb rax, rax ret sp_3072_sub_in_place_48 ENDP _text ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_add_48 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 adc r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 adc r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 adc r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 adc r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 adc r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 adc r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 adc r10, QWORD PTR [r8+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [rcx+120], r10 adc r9, QWORD PTR [r8+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [rcx+128], r9 adc r10, QWORD PTR [r8+136] mov r9, QWORD PTR [rdx+144] mov QWORD PTR [rcx+136], r10 adc r9, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+152] mov QWORD PTR [rcx+144], r9 adc r10, QWORD PTR [r8+152] mov r9, QWORD PTR [rdx+160] mov QWORD PTR [rcx+152], r10 adc r9, QWORD PTR [r8+160] mov r10, QWORD PTR [rdx+168] mov QWORD PTR [rcx+160], r9 adc r10, QWORD PTR [r8+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [rcx+168], r10 adc r9, QWORD PTR [r8+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [rcx+176], r9 adc r10, QWORD PTR [r8+184] mov r9, QWORD PTR [rdx+192] mov QWORD PTR [rcx+184], r10 adc r9, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+200] mov QWORD PTR [rcx+192], r9 adc r10, QWORD PTR [r8+200] mov r9, QWORD PTR [rdx+208] mov QWORD PTR [rcx+200], r10 adc r9, QWORD PTR [r8+208] mov r10, QWORD PTR [rdx+216] mov QWORD PTR [rcx+208], r9 adc r10, QWORD PTR [r8+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [rcx+216], r10 adc r9, QWORD PTR [r8+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [rcx+224], r9 adc r10, QWORD PTR [r8+232] mov r9, QWORD PTR [rdx+240] mov QWORD PTR [rcx+232], r10 adc r9, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+248] mov QWORD PTR [rcx+240], r9 adc r10, QWORD PTR [r8+248] mov r9, QWORD PTR [rdx+256] mov QWORD PTR [rcx+248], r10 adc r9, QWORD PTR [r8+256] mov r10, QWORD PTR [rdx+264] mov QWORD PTR [rcx+256], r9 adc r10, QWORD PTR [r8+264] mov r9, QWORD PTR [rdx+272] mov QWORD PTR [rcx+264], r10 adc r9, QWORD PTR [r8+272] mov r10, QWORD PTR [rdx+280] mov QWORD PTR [rcx+272], r9 adc r10, QWORD PTR [r8+280] mov r9, QWORD PTR [rdx+288] mov QWORD PTR [rcx+280], r10 adc r9, QWORD PTR [r8+288] mov r10, QWORD PTR [rdx+296] mov QWORD PTR [rcx+288], r9 adc r10, QWORD PTR [r8+296] mov r9, QWORD PTR [rdx+304] mov QWORD PTR [rcx+296], r10 adc r9, QWORD PTR [r8+304] mov r10, QWORD PTR [rdx+312] mov QWORD PTR [rcx+304], r9 adc r10, QWORD PTR [r8+312] mov r9, QWORD PTR [rdx+320] mov QWORD PTR [rcx+312], r10 adc r9, QWORD PTR [r8+320] mov r10, QWORD PTR [rdx+328] mov QWORD PTR [rcx+320], r9 adc r10, QWORD PTR [r8+328] mov r9, QWORD PTR [rdx+336] mov QWORD PTR [rcx+328], r10 adc r9, QWORD PTR [r8+336] mov r10, QWORD PTR [rdx+344] mov QWORD PTR [rcx+336], r9 adc r10, QWORD PTR [r8+344] mov r9, QWORD PTR [rdx+352] mov QWORD PTR [rcx+344], r10 adc r9, QWORD PTR [r8+352] mov r10, QWORD PTR [rdx+360] mov QWORD PTR [rcx+352], r9 adc r10, QWORD PTR [r8+360] mov r9, QWORD PTR [rdx+368] mov QWORD PTR [rcx+360], r10 adc r9, QWORD PTR [r8+368] mov r10, QWORD PTR [rdx+376] mov QWORD PTR [rcx+368], r9 adc r10, QWORD PTR [r8+376] mov QWORD PTR [rcx+376], r10 adc rax, 0 ret sp_3072_add_48 ENDP _text ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_mul_48 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 1192 mov QWORD PTR [rsp+1152], rcx mov QWORD PTR [rsp+1160], rdx mov QWORD PTR [rsp+1168], r8 lea r12, QWORD PTR [rsp+768] lea r14, QWORD PTR [rdx+192] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r12+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [rdx+104] mov QWORD PTR [r12+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [rdx+112] mov QWORD PTR [r12+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [rdx+120] mov QWORD PTR [r12+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [r12+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [r12+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r12+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [rdx+152] mov QWORD PTR [r12+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [rdx+160] mov QWORD PTR [r12+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [rdx+168] mov QWORD PTR [r12+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [r12+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [r12+176], r9 adc r10, QWORD PTR [r14+184] mov QWORD PTR [r12+184], r10 adc r15, 0 mov QWORD PTR [rsp+1176], r15 lea r13, QWORD PTR [rsp+960] lea r14, QWORD PTR [r8+192] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [r8+96] mov QWORD PTR [r13+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [r8+104] mov QWORD PTR [r13+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [r8+112] mov QWORD PTR [r13+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [r8+120] mov QWORD PTR [r13+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [r8+128] mov QWORD PTR [r13+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [r8+136] mov QWORD PTR [r13+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [r8+144] mov QWORD PTR [r13+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [r8+152] mov QWORD PTR [r13+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [r8+160] mov QWORD PTR [r13+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [r8+168] mov QWORD PTR [r13+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [r8+176] mov QWORD PTR [r13+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [r8+184] mov QWORD PTR [r13+176], r9 adc r10, QWORD PTR [r14+184] mov QWORD PTR [r13+184], r10 adc rdi, 0 mov QWORD PTR [rsp+1184], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_3072_mul_24 mov r8, QWORD PTR [rsp+1168] mov rdx, QWORD PTR [rsp+1160] lea rcx, QWORD PTR [rsp+384] add r8, 192 add rdx, 192 call sp_3072_mul_24 mov r8, QWORD PTR [rsp+1168] mov rdx, QWORD PTR [rsp+1160] mov rcx, QWORD PTR [rsp+1152] call sp_3072_mul_24 IFDEF _WIN64 mov r8, QWORD PTR [rsp+1168] mov rdx, QWORD PTR [rsp+1160] mov rcx, QWORD PTR [rsp+1152] ENDIF mov r15, QWORD PTR [rsp+1176] mov rdi, QWORD PTR [rsp+1184] mov rsi, QWORD PTR [rsp+1152] mov r11, r15 lea r12, QWORD PTR [rsp+768] lea r13, QWORD PTR [rsp+960] and r11, rdi neg r15 neg rdi add rsi, 384 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] and rax, rdi and r9, r15 mov QWORD PTR [r12], rax mov QWORD PTR [r13], r9 mov rax, QWORD PTR [r12+8] mov r9, QWORD PTR [r13+8] and rax, rdi and r9, r15 mov QWORD PTR [r12+8], rax mov QWORD PTR [r13+8], r9 mov rax, QWORD PTR [r12+16] mov r9, QWORD PTR [r13+16] and rax, rdi and r9, r15 mov QWORD PTR [r12+16], rax mov QWORD PTR [r13+16], r9 mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] and rax, rdi and r9, r15 mov QWORD PTR [r12+24], rax mov QWORD PTR [r13+24], r9 mov rax, QWORD PTR [r12+32] mov r9, QWORD PTR [r13+32] and rax, rdi and r9, r15 mov QWORD PTR [r12+32], rax mov QWORD PTR [r13+32], r9 mov rax, QWORD PTR [r12+40] mov r9, QWORD PTR [r13+40] and rax, rdi and r9, r15 mov QWORD PTR [r12+40], rax mov QWORD PTR [r13+40], r9 mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] and rax, rdi and r9, r15 mov QWORD PTR [r12+48], rax mov QWORD PTR [r13+48], r9 mov rax, QWORD PTR [r12+56] mov r9, QWORD PTR [r13+56] and rax, rdi and r9, r15 mov QWORD PTR [r12+56], rax mov QWORD PTR [r13+56], r9 mov rax, QWORD PTR [r12+64] mov r9, QWORD PTR [r13+64] and rax, rdi and r9, r15 mov QWORD PTR [r12+64], rax mov QWORD PTR [r13+64], r9 mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] and rax, rdi and r9, r15 mov QWORD PTR [r12+72], rax mov QWORD PTR [r13+72], r9 mov rax, QWORD PTR [r12+80] mov r9, QWORD PTR [r13+80] and rax, rdi and r9, r15 mov QWORD PTR [r12+80], rax mov QWORD PTR [r13+80], r9 mov rax, QWORD PTR [r12+88] mov r9, QWORD PTR [r13+88] and rax, rdi and r9, r15 mov QWORD PTR [r12+88], rax mov QWORD PTR [r13+88], r9 mov rax, QWORD PTR [r12+96] mov r9, QWORD PTR [r13+96] and rax, rdi and r9, r15 mov QWORD PTR [r12+96], rax mov QWORD PTR [r13+96], r9 mov rax, QWORD PTR [r12+104] mov r9, QWORD PTR [r13+104] and rax, rdi and r9, r15 mov QWORD PTR [r12+104], rax mov QWORD PTR [r13+104], r9 mov rax, QWORD PTR [r12+112] mov r9, QWORD PTR [r13+112] and rax, rdi and r9, r15 mov QWORD PTR [r12+112], rax mov QWORD PTR [r13+112], r9 mov rax, QWORD PTR [r12+120] mov r9, QWORD PTR [r13+120] and rax, rdi and r9, r15 mov QWORD PTR [r12+120], rax mov QWORD PTR [r13+120], r9 mov rax, QWORD PTR [r12+128] mov r9, QWORD PTR [r13+128] and rax, rdi and r9, r15 mov QWORD PTR [r12+128], rax mov QWORD PTR [r13+128], r9 mov rax, QWORD PTR [r12+136] mov r9, QWORD PTR [r13+136] and rax, rdi and r9, r15 mov QWORD PTR [r12+136], rax mov QWORD PTR [r13+136], r9 mov rax, QWORD PTR [r12+144] mov r9, QWORD PTR [r13+144] and rax, rdi and r9, r15 mov QWORD PTR [r12+144], rax mov QWORD PTR [r13+144], r9 mov rax, QWORD PTR [r12+152] mov r9, QWORD PTR [r13+152] and rax, rdi and r9, r15 mov QWORD PTR [r12+152], rax mov QWORD PTR [r13+152], r9 mov rax, QWORD PTR [r12+160] mov r9, QWORD PTR [r13+160] and rax, rdi and r9, r15 mov QWORD PTR [r12+160], rax mov QWORD PTR [r13+160], r9 mov rax, QWORD PTR [r12+168] mov r9, QWORD PTR [r13+168] and rax, rdi and r9, r15 mov QWORD PTR [r12+168], rax mov QWORD PTR [r13+168], r9 mov rax, QWORD PTR [r12+176] mov r9, QWORD PTR [r13+176] and rax, rdi and r9, r15 mov QWORD PTR [r12+176], rax mov QWORD PTR [r13+176], r9 mov rax, QWORD PTR [r12+184] mov r9, QWORD PTR [r13+184] and rax, rdi and r9, r15 mov QWORD PTR [r12+184], rax mov QWORD PTR [r13+184], r9 mov rax, QWORD PTR [r12] add rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r13+184] mov QWORD PTR [rsi+184], r10 adc r11, 0 lea r13, QWORD PTR [rsp+384] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [r13+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [r13+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [r13+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [r13+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [r13+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [r13+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [r13+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [r13+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [r13+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [r13+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [r13+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [r13+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [r13+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [r13+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [r13+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [r13+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [r13+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [r13+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [r13+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [r13+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [r13+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [r13+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [r13+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [r13+376] mov QWORD PTR [r12+376], r10 sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [rcx+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [rcx+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [rcx+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [rcx+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [rcx+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [rcx+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [rcx+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [rcx+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [rcx+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [rcx+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [rcx+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [rcx+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [rcx+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [rcx+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [rcx+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [rcx+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [rcx+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [rcx+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [rcx+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [rcx+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [rcx+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [rcx+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [rcx+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [rcx+376] mov QWORD PTR [r12+376], r10 sbb r11, 0 sub rsi, 192 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r12+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r12+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r12+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r12+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r12+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r12+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r12+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r12+248] mov r10, QWORD PTR [rsi+256] mov QWORD PTR [rsi+248], r9 adc r10, QWORD PTR [r12+256] mov rax, QWORD PTR [rsi+264] mov QWORD PTR [rsi+256], r10 adc rax, QWORD PTR [r12+264] mov r9, QWORD PTR [rsi+272] mov QWORD PTR [rsi+264], rax adc r9, QWORD PTR [r12+272] mov r10, QWORD PTR [rsi+280] mov QWORD PTR [rsi+272], r9 adc r10, QWORD PTR [r12+280] mov rax, QWORD PTR [rsi+288] mov QWORD PTR [rsi+280], r10 adc rax, QWORD PTR [r12+288] mov r9, QWORD PTR [rsi+296] mov QWORD PTR [rsi+288], rax adc r9, QWORD PTR [r12+296] mov r10, QWORD PTR [rsi+304] mov QWORD PTR [rsi+296], r9 adc r10, QWORD PTR [r12+304] mov rax, QWORD PTR [rsi+312] mov QWORD PTR [rsi+304], r10 adc rax, QWORD PTR [r12+312] mov r9, QWORD PTR [rsi+320] mov QWORD PTR [rsi+312], rax adc r9, QWORD PTR [r12+320] mov r10, QWORD PTR [rsi+328] mov QWORD PTR [rsi+320], r9 adc r10, QWORD PTR [r12+328] mov rax, QWORD PTR [rsi+336] mov QWORD PTR [rsi+328], r10 adc rax, QWORD PTR [r12+336] mov r9, QWORD PTR [rsi+344] mov QWORD PTR [rsi+336], rax adc r9, QWORD PTR [r12+344] mov r10, QWORD PTR [rsi+352] mov QWORD PTR [rsi+344], r9 adc r10, QWORD PTR [r12+352] mov rax, QWORD PTR [rsi+360] mov QWORD PTR [rsi+352], r10 adc rax, QWORD PTR [r12+360] mov r9, QWORD PTR [rsi+368] mov QWORD PTR [rsi+360], rax adc r9, QWORD PTR [r12+368] mov r10, QWORD PTR [rsi+376] mov QWORD PTR [rsi+368], r9 adc r10, QWORD PTR [r12+376] mov QWORD PTR [rsi+376], r10 adc r11, 0 mov QWORD PTR [rcx+576], r11 add rsi, 192 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r13+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r13+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r13+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r13+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r13+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r13+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r13+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r13+192] mov QWORD PTR [rsi+192], rax ; Add to zero mov rax, QWORD PTR [r13+200] adc rax, 0 mov r9, QWORD PTR [r13+208] mov QWORD PTR [rsi+200], rax adc r9, 0 mov r10, QWORD PTR [r13+216] mov QWORD PTR [rsi+208], r9 adc r10, 0 mov rax, QWORD PTR [r13+224] mov QWORD PTR [rsi+216], r10 adc rax, 0 mov r9, QWORD PTR [r13+232] mov QWORD PTR [rsi+224], rax adc r9, 0 mov r10, QWORD PTR [r13+240] mov QWORD PTR [rsi+232], r9 adc r10, 0 mov rax, QWORD PTR [r13+248] mov QWORD PTR [rsi+240], r10 adc rax, 0 mov r9, QWORD PTR [r13+256] mov QWORD PTR [rsi+248], rax adc r9, 0 mov r10, QWORD PTR [r13+264] mov QWORD PTR [rsi+256], r9 adc r10, 0 mov rax, QWORD PTR [r13+272] mov QWORD PTR [rsi+264], r10 adc rax, 0 mov r9, QWORD PTR [r13+280] mov QWORD PTR [rsi+272], rax adc r9, 0 mov r10, QWORD PTR [r13+288] mov QWORD PTR [rsi+280], r9 adc r10, 0 mov rax, QWORD PTR [r13+296] mov QWORD PTR [rsi+288], r10 adc rax, 0 mov r9, QWORD PTR [r13+304] mov QWORD PTR [rsi+296], rax adc r9, 0 mov r10, QWORD PTR [r13+312] mov QWORD PTR [rsi+304], r9 adc r10, 0 mov rax, QWORD PTR [r13+320] mov QWORD PTR [rsi+312], r10 adc rax, 0 mov r9, QWORD PTR [r13+328] mov QWORD PTR [rsi+320], rax adc r9, 0 mov r10, QWORD PTR [r13+336] mov QWORD PTR [rsi+328], r9 adc r10, 0 mov rax, QWORD PTR [r13+344] mov QWORD PTR [rsi+336], r10 adc rax, 0 mov r9, QWORD PTR [r13+352] mov QWORD PTR [rsi+344], rax adc r9, 0 mov r10, QWORD PTR [r13+360] mov QWORD PTR [rsi+352], r9 adc r10, 0 mov rax, QWORD PTR [r13+368] mov QWORD PTR [rsi+360], r10 adc rax, 0 mov r9, QWORD PTR [r13+376] mov QWORD PTR [rsi+368], rax adc r9, 0 mov QWORD PTR [rsi+376], r9 add rsp, 1192 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mul_48 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_mul_avx2_48 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 1192 mov QWORD PTR [rsp+1152], rcx mov QWORD PTR [rsp+1160], rdx mov QWORD PTR [rsp+1168], r8 lea r12, QWORD PTR [rsp+768] lea r14, QWORD PTR [rdx+192] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r12+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [rdx+104] mov QWORD PTR [r12+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [rdx+112] mov QWORD PTR [r12+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [rdx+120] mov QWORD PTR [r12+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [r12+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [r12+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r12+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [rdx+152] mov QWORD PTR [r12+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [rdx+160] mov QWORD PTR [r12+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [rdx+168] mov QWORD PTR [r12+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [r12+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [r12+176], r9 adc r10, QWORD PTR [r14+184] mov QWORD PTR [r12+184], r10 adc r15, 0 mov QWORD PTR [rsp+1176], r15 lea r13, QWORD PTR [rsp+960] lea r14, QWORD PTR [r8+192] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [r8+96] mov QWORD PTR [r13+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [r8+104] mov QWORD PTR [r13+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [r8+112] mov QWORD PTR [r13+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [r8+120] mov QWORD PTR [r13+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [r8+128] mov QWORD PTR [r13+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [r8+136] mov QWORD PTR [r13+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [r8+144] mov QWORD PTR [r13+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [r8+152] mov QWORD PTR [r13+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [r8+160] mov QWORD PTR [r13+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [r8+168] mov QWORD PTR [r13+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [r8+176] mov QWORD PTR [r13+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [r8+184] mov QWORD PTR [r13+176], r9 adc r10, QWORD PTR [r14+184] mov QWORD PTR [r13+184], r10 adc rdi, 0 mov QWORD PTR [rsp+1184], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_3072_mul_avx2_24 mov r8, QWORD PTR [rsp+1168] mov rdx, QWORD PTR [rsp+1160] lea rcx, QWORD PTR [rsp+384] add r8, 192 add rdx, 192 call sp_3072_mul_avx2_24 mov r8, QWORD PTR [rsp+1168] mov rdx, QWORD PTR [rsp+1160] mov rcx, QWORD PTR [rsp+1152] call sp_3072_mul_avx2_24 IFDEF _WIN64 mov r8, QWORD PTR [rsp+1168] mov rdx, QWORD PTR [rsp+1160] mov rcx, QWORD PTR [rsp+1152] ENDIF mov r15, QWORD PTR [rsp+1176] mov rdi, QWORD PTR [rsp+1184] mov rsi, QWORD PTR [rsp+1152] mov r11, r15 lea r12, QWORD PTR [rsp+768] lea r13, QWORD PTR [rsp+960] and r11, rdi neg r15 neg rdi add rsi, 384 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] pext rax, rax, rdi pext r9, r9, r15 add rax, r9 mov r9, QWORD PTR [r12+8] mov r10, QWORD PTR [r13+8] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi], rax adc r9, r10 mov r10, QWORD PTR [r12+16] mov rax, QWORD PTR [r13+16] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+8], r9 adc r10, rax mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+16], r10 adc rax, r9 mov r9, QWORD PTR [r12+32] mov r10, QWORD PTR [r13+32] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+24], rax adc r9, r10 mov r10, QWORD PTR [r12+40] mov rax, QWORD PTR [r13+40] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+32], r9 adc r10, rax mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+40], r10 adc rax, r9 mov r9, QWORD PTR [r12+56] mov r10, QWORD PTR [r13+56] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+48], rax adc r9, r10 mov r10, QWORD PTR [r12+64] mov rax, QWORD PTR [r13+64] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+56], r9 adc r10, rax mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+64], r10 adc rax, r9 mov r9, QWORD PTR [r12+80] mov r10, QWORD PTR [r13+80] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+72], rax adc r9, r10 mov r10, QWORD PTR [r12+88] mov rax, QWORD PTR [r13+88] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+80], r9 adc r10, rax mov rax, QWORD PTR [r12+96] mov r9, QWORD PTR [r13+96] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+88], r10 adc rax, r9 mov r9, QWORD PTR [r12+104] mov r10, QWORD PTR [r13+104] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+96], rax adc r9, r10 mov r10, QWORD PTR [r12+112] mov rax, QWORD PTR [r13+112] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+104], r9 adc r10, rax mov rax, QWORD PTR [r12+120] mov r9, QWORD PTR [r13+120] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+112], r10 adc rax, r9 mov r9, QWORD PTR [r12+128] mov r10, QWORD PTR [r13+128] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+120], rax adc r9, r10 mov r10, QWORD PTR [r12+136] mov rax, QWORD PTR [r13+136] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+128], r9 adc r10, rax mov rax, QWORD PTR [r12+144] mov r9, QWORD PTR [r13+144] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+136], r10 adc rax, r9 mov r9, QWORD PTR [r12+152] mov r10, QWORD PTR [r13+152] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+144], rax adc r9, r10 mov r10, QWORD PTR [r12+160] mov rax, QWORD PTR [r13+160] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+152], r9 adc r10, rax mov rax, QWORD PTR [r12+168] mov r9, QWORD PTR [r13+168] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+160], r10 adc rax, r9 mov r9, QWORD PTR [r12+176] mov r10, QWORD PTR [r13+176] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+168], rax adc r9, r10 mov r10, QWORD PTR [r12+184] mov rax, QWORD PTR [r13+184] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+176], r9 adc r10, rax mov QWORD PTR [rsi+184], r10 adc r11, 0 lea r13, QWORD PTR [rsp+384] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [r13+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [r13+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [r13+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [r13+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [r13+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [r13+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [r13+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [r13+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [r13+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [r13+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [r13+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [r13+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [r13+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [r13+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [r13+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [r13+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [r13+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [r13+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [r13+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [r13+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [r13+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [r13+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [r13+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [r13+376] mov QWORD PTR [r12+376], r10 sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [rcx+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [rcx+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [rcx+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [rcx+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [rcx+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [rcx+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [rcx+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [rcx+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [rcx+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [rcx+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [rcx+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [rcx+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [rcx+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [rcx+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [rcx+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [rcx+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [rcx+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [rcx+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [rcx+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [rcx+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [rcx+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [rcx+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [rcx+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [rcx+376] mov QWORD PTR [r12+376], r10 sbb r11, 0 sub rsi, 192 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r12+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r12+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r12+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r12+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r12+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r12+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r12+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r12+248] mov r10, QWORD PTR [rsi+256] mov QWORD PTR [rsi+248], r9 adc r10, QWORD PTR [r12+256] mov rax, QWORD PTR [rsi+264] mov QWORD PTR [rsi+256], r10 adc rax, QWORD PTR [r12+264] mov r9, QWORD PTR [rsi+272] mov QWORD PTR [rsi+264], rax adc r9, QWORD PTR [r12+272] mov r10, QWORD PTR [rsi+280] mov QWORD PTR [rsi+272], r9 adc r10, QWORD PTR [r12+280] mov rax, QWORD PTR [rsi+288] mov QWORD PTR [rsi+280], r10 adc rax, QWORD PTR [r12+288] mov r9, QWORD PTR [rsi+296] mov QWORD PTR [rsi+288], rax adc r9, QWORD PTR [r12+296] mov r10, QWORD PTR [rsi+304] mov QWORD PTR [rsi+296], r9 adc r10, QWORD PTR [r12+304] mov rax, QWORD PTR [rsi+312] mov QWORD PTR [rsi+304], r10 adc rax, QWORD PTR [r12+312] mov r9, QWORD PTR [rsi+320] mov QWORD PTR [rsi+312], rax adc r9, QWORD PTR [r12+320] mov r10, QWORD PTR [rsi+328] mov QWORD PTR [rsi+320], r9 adc r10, QWORD PTR [r12+328] mov rax, QWORD PTR [rsi+336] mov QWORD PTR [rsi+328], r10 adc rax, QWORD PTR [r12+336] mov r9, QWORD PTR [rsi+344] mov QWORD PTR [rsi+336], rax adc r9, QWORD PTR [r12+344] mov r10, QWORD PTR [rsi+352] mov QWORD PTR [rsi+344], r9 adc r10, QWORD PTR [r12+352] mov rax, QWORD PTR [rsi+360] mov QWORD PTR [rsi+352], r10 adc rax, QWORD PTR [r12+360] mov r9, QWORD PTR [rsi+368] mov QWORD PTR [rsi+360], rax adc r9, QWORD PTR [r12+368] mov r10, QWORD PTR [rsi+376] mov QWORD PTR [rsi+368], r9 adc r10, QWORD PTR [r12+376] mov QWORD PTR [rsi+376], r10 adc r11, 0 mov QWORD PTR [rcx+576], r11 add rsi, 192 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r13+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r13+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r13+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r13+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r13+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r13+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r13+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r13+192] mov QWORD PTR [rsi+192], rax ; Add to zero mov rax, QWORD PTR [r13+200] adc rax, 0 mov r9, QWORD PTR [r13+208] mov QWORD PTR [rsi+200], rax adc r9, 0 mov r10, QWORD PTR [r13+216] mov QWORD PTR [rsi+208], r9 adc r10, 0 mov rax, QWORD PTR [r13+224] mov QWORD PTR [rsi+216], r10 adc rax, 0 mov r9, QWORD PTR [r13+232] mov QWORD PTR [rsi+224], rax adc r9, 0 mov r10, QWORD PTR [r13+240] mov QWORD PTR [rsi+232], r9 adc r10, 0 mov rax, QWORD PTR [r13+248] mov QWORD PTR [rsi+240], r10 adc rax, 0 mov r9, QWORD PTR [r13+256] mov QWORD PTR [rsi+248], rax adc r9, 0 mov r10, QWORD PTR [r13+264] mov QWORD PTR [rsi+256], r9 adc r10, 0 mov rax, QWORD PTR [r13+272] mov QWORD PTR [rsi+264], r10 adc rax, 0 mov r9, QWORD PTR [r13+280] mov QWORD PTR [rsi+272], rax adc r9, 0 mov r10, QWORD PTR [r13+288] mov QWORD PTR [rsi+280], r9 adc r10, 0 mov rax, QWORD PTR [r13+296] mov QWORD PTR [rsi+288], r10 adc rax, 0 mov r9, QWORD PTR [r13+304] mov QWORD PTR [rsi+296], rax adc r9, 0 mov r10, QWORD PTR [r13+312] mov QWORD PTR [rsi+304], r9 adc r10, 0 mov rax, QWORD PTR [r13+320] mov QWORD PTR [rsi+312], r10 adc rax, 0 mov r9, QWORD PTR [r13+328] mov QWORD PTR [rsi+320], rax adc r9, 0 mov r10, QWORD PTR [r13+336] mov QWORD PTR [rsi+328], r9 adc r10, 0 mov rax, QWORD PTR [r13+344] mov QWORD PTR [rsi+336], r10 adc rax, 0 mov r9, QWORD PTR [r13+352] mov QWORD PTR [rsi+344], rax adc r9, 0 mov r10, QWORD PTR [r13+360] mov QWORD PTR [rsi+352], r9 adc r10, 0 mov rax, QWORD PTR [r13+368] mov QWORD PTR [rsi+360], r10 adc rax, 0 mov r9, QWORD PTR [r13+376] mov QWORD PTR [rsi+368], rax adc r9, 0 mov QWORD PTR [rsi+376], r9 add rsp, 1192 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mul_avx2_48 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sqr_12 PROC push r12 push r13 push r14 mov r8, rdx sub rsp, 96 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax xor r11, r11 mov QWORD PTR [rsp], rax mov r10, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+8], r10 ; A[0] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rsp+16], r11 ; A[0] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[1] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+8] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+24], r9 ; A[0] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[1] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+8] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+32], r10 ; A[0] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+40], r11 ; A[0] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+48], r9 ; A[0] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+56], r10 ; A[0] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[4] mov rax, QWORD PTR [r8+32] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+64], r11 ; A[0] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+72], r9 ; A[0] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[5] mov rax, QWORD PTR [r8+40] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+80], r10 ; A[0] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+88], r11 ; A[1] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[2] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[6] mov rax, QWORD PTR [r8+48] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+96], r9 ; A[2] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+16] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[3] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+104], r10 ; A[3] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+24] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[4] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[7] mov rax, QWORD PTR [r8+56] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+112], r11 ; A[4] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+32] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[5] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+120], r9 ; A[5] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+40] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[6] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[8] mov rax, QWORD PTR [r8+64] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+128], r10 ; A[6] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+48] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[7] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+136], r11 ; A[7] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+56] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[8] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+64] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[9] * A[9] mov rax, QWORD PTR [r8+72] mul rax add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+144], r9 ; A[8] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+64] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[9] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+72] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rcx+152], r10 ; A[9] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+72] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[10] * A[10] mov rax, QWORD PTR [r8+80] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+160], r11 ; A[10] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+80] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+168], r9 ; A[11] * A[11] mov rax, QWORD PTR [r8+88] mul rax add r10, rax adc r11, rdx mov QWORD PTR [rcx+176], r10 mov QWORD PTR [rcx+184], r11 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r12, QWORD PTR [rsp+16] mov r13, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r12, QWORD PTR [rsp+48] mov r13, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r12 mov QWORD PTR [rcx+56], r13 mov rax, QWORD PTR [rsp+64] mov rdx, QWORD PTR [rsp+72] mov r12, QWORD PTR [rsp+80] mov r13, QWORD PTR [rsp+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], rdx mov QWORD PTR [rcx+80], r12 mov QWORD PTR [rcx+88], r13 add rsp, 96 pop r14 pop r13 pop r12 ret sp_3072_sqr_12 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sqr_avx2_12 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov r8, rcx mov r9, rdx sub rsp, 96 cmp r9, r8 mov rbp, rsp cmovne rbp, r8 add r8, 96 xor r12, r12 ; Diagonal 1 ; Zero into %r9 ; A[1] x A[0] mov rdx, QWORD PTR [r9] mulx r11, r10, QWORD PTR [r9+8] mov QWORD PTR [rbp+8], r10 ; Zero into %r8 ; A[2] x A[0] mulx r10, rax, QWORD PTR [r9+16] adcx r11, rax adox r10, r12 mov QWORD PTR [rbp+16], r11 ; Zero into %r9 ; A[3] x A[0] mulx r11, rax, QWORD PTR [r9+24] adcx r10, rax adox r11, r12 mov QWORD PTR [rbp+24], r10 ; Zero into %r8 ; A[4] x A[0] mulx r10, rax, QWORD PTR [r9+32] adcx r11, rax adox r10, r12 mov QWORD PTR [rbp+32], r11 ; Zero into %r9 ; A[5] x A[0] mulx r11, rax, QWORD PTR [r9+40] adcx r10, rax adox r11, r12 mov QWORD PTR [rbp+40], r10 ; No load %r12 - %r8 ; A[6] x A[0] mulx r14, rax, QWORD PTR [r9+48] adcx r11, rax adox r14, r12 mov QWORD PTR [rbp+48], r11 ; No load %r13 - %r9 ; A[7] x A[0] mulx r15, rax, QWORD PTR [r9+56] adcx r14, rax adox r15, r12 ; No store %r12 - %r8 ; No load %r14 - %r8 ; A[8] x A[0] mulx rdi, rax, QWORD PTR [r9+64] adcx r15, rax adox rdi, r12 ; No store %r13 - %r9 ; No load %r15 - %r9 ; A[9] x A[0] mulx rsi, rax, QWORD PTR [r9+72] adcx rdi, rax adox rsi, r12 ; No store %r14 - %r8 ; No load %rbx - %r8 ; A[10] x A[0] mulx rbx, rax, QWORD PTR [r9+80] adcx rsi, rax adox rbx, r12 ; No store %r15 - %r9 ; Zero into %r9 ; A[11] x A[0] mulx r11, rax, QWORD PTR [r9+88] adcx rbx, rax adox r11, r12 ; No store %rbx - %r8 ; Carry adcx r11, r12 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8], r11 ; Diagonal 2 mov r11, QWORD PTR [rbp+24] mov r10, QWORD PTR [rbp+32] ; A[2] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, QWORD PTR [r9+16] adcx r11, rax adox r10, rcx mov QWORD PTR [rbp+24], r11 mov r11, QWORD PTR [rbp+40] ; A[3] x A[1] mulx rcx, rax, QWORD PTR [r9+24] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+32], r10 mov r10, QWORD PTR [rbp+48] ; A[4] x A[1] mulx rcx, rax, QWORD PTR [r9+32] adcx r11, rax adox r10, rcx mov QWORD PTR [rbp+40], r11 ; No load %r12 - %r9 ; A[5] x A[1] mulx rcx, rax, QWORD PTR [r9+40] adcx r10, rax adox r14, rcx mov QWORD PTR [rbp+48], r10 ; No load %r13 - %r8 ; A[6] x A[1] mulx rcx, rax, QWORD PTR [r9+48] adcx r14, rax adox r15, rcx ; No store %r12 - %r9 ; No load %r14 - %r9 ; A[7] x A[1] mulx rcx, rax, QWORD PTR [r9+56] adcx r15, rax adox rdi, rcx ; No store %r13 - %r8 ; No load %r15 - %r8 ; A[8] x A[1] mulx rcx, rax, QWORD PTR [r9+64] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r9 ; No load %rbx - %r9 ; A[9] x A[1] mulx rcx, rax, QWORD PTR [r9+72] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r8 mov r10, QWORD PTR [r8] ; A[10] x A[1] mulx rcx, rax, QWORD PTR [r9+80] adcx rbx, rax adox r10, rcx ; No store %rbx - %r9 ; Zero into %r9 ; A[11] x A[1] mulx r11, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, r12 mov QWORD PTR [r8], r10 ; Zero into %r8 ; A[11] x A[2] mov rdx, QWORD PTR [r9+16] mulx r10, rax, QWORD PTR [r9+88] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+8], r11 ; Carry adcx r10, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+16], r10 ; Diagonal 3 mov r10, QWORD PTR [rbp+40] mov r11, QWORD PTR [rbp+48] ; A[3] x A[2] mulx rcx, rax, QWORD PTR [r9+24] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+40], r10 ; No load %r12 - %r8 ; A[4] x A[2] mulx rcx, rax, QWORD PTR [r9+32] adcx r11, rax adox r14, rcx mov QWORD PTR [rbp+48], r11 ; No load %r13 - %r9 ; A[5] x A[2] mulx rcx, rax, QWORD PTR [r9+40] adcx r14, rax adox r15, rcx ; No store %r12 - %r8 ; No load %r14 - %r8 ; A[6] x A[2] mulx rcx, rax, QWORD PTR [r9+48] adcx r15, rax adox rdi, rcx ; No store %r13 - %r9 ; No load %r15 - %r9 ; A[7] x A[2] mulx rcx, rax, QWORD PTR [r9+56] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r8 ; No load %rbx - %r8 ; A[8] x A[2] mulx rcx, rax, QWORD PTR [r9+64] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r9 mov r11, QWORD PTR [r8] ; A[9] x A[2] mulx rcx, rax, QWORD PTR [r9+72] adcx rbx, rax adox r11, rcx ; No store %rbx - %r8 mov r10, QWORD PTR [r8+8] ; A[10] x A[2] mulx rcx, rax, QWORD PTR [r9+80] adcx r11, rax adox r10, rcx mov QWORD PTR [r8], r11 mov r11, QWORD PTR [r8+16] ; A[10] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+8], r10 ; Zero into %r8 ; A[10] x A[4] mov rdx, QWORD PTR [r9+32] mulx r10, rax, QWORD PTR [r9+80] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+16], r11 ; Zero into %r9 ; A[10] x A[5] mov rdx, QWORD PTR [r9+40] mulx r11, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, r12 mov QWORD PTR [r8+24], r10 ; Carry adcx r11, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+32], r11 ; Diagonal 4 ; No load %r13 - %r8 ; A[4] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+32] adcx r14, rax adox r15, rcx ; No store %r12 - %r9 ; No load %r14 - %r9 ; A[5] x A[3] mulx rcx, rax, QWORD PTR [r9+40] adcx r15, rax adox rdi, rcx ; No store %r13 - %r8 ; No load %r15 - %r8 ; A[6] x A[3] mulx rcx, rax, QWORD PTR [r9+48] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r9 ; No load %rbx - %r9 ; A[7] x A[3] mulx rcx, rax, QWORD PTR [r9+56] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r8 mov r10, QWORD PTR [r8] ; A[8] x A[3] mulx rcx, rax, QWORD PTR [r9+64] adcx rbx, rax adox r10, rcx ; No store %rbx - %r9 mov r11, QWORD PTR [r8+8] ; A[9] x A[3] mulx rcx, rax, QWORD PTR [r9+72] adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r10, QWORD PTR [r8+16] ; A[9] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+72] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+8], r11 mov r11, QWORD PTR [r8+24] ; A[9] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+72] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+16], r10 mov r10, QWORD PTR [r8+32] ; A[9] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+72] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+24], r11 ; Zero into %r9 ; A[9] x A[7] mov rdx, QWORD PTR [r9+56] mulx r11, rax, QWORD PTR [r9+72] adcx r10, rax adox r11, r12 mov QWORD PTR [r8+32], r10 ; Zero into %r8 ; A[9] x A[8] mov rdx, QWORD PTR [r9+64] mulx r10, rax, QWORD PTR [r9+72] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+40], r11 ; Carry adcx r10, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+48], r10 ; Diagonal 5 ; No load %r15 - %r9 ; A[5] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+40] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r8 ; No load %rbx - %r8 ; A[6] x A[4] mulx rcx, rax, QWORD PTR [r9+48] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r9 mov r11, QWORD PTR [r8] ; A[7] x A[4] mulx rcx, rax, QWORD PTR [r9+56] adcx rbx, rax adox r11, rcx ; No store %rbx - %r8 mov r10, QWORD PTR [r8+8] ; A[8] x A[4] mulx rcx, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, rcx mov QWORD PTR [r8], r11 mov r11, QWORD PTR [r8+16] ; A[8] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+8], r10 mov r10, QWORD PTR [r8+24] ; A[8] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+16], r11 mov r11, QWORD PTR [r8+32] ; A[8] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r10 mov r10, QWORD PTR [r8+40] ; A[10] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+80] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+32], r11 mov r11, QWORD PTR [r8+48] ; A[10] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+40], r10 ; Zero into %r8 ; A[10] x A[8] mov rdx, QWORD PTR [r9+64] mulx r10, rax, QWORD PTR [r9+80] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+48], r11 ; Zero into %r9 ; A[10] x A[9] mov rdx, QWORD PTR [r9+72] mulx r11, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, r12 mov QWORD PTR [r8+56], r10 ; Carry adcx r11, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+64], r11 ; Diagonal 6 mov r10, QWORD PTR [r8] ; A[6] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+48] adcx rbx, rax adox r10, rcx ; No store %rbx - %r9 mov r11, QWORD PTR [r8+8] ; A[7] x A[5] mulx rcx, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r10, QWORD PTR [r8+16] ; A[7] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+56] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+8], r11 mov r11, QWORD PTR [r8+24] ; A[11] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+16], r10 mov r10, QWORD PTR [r8+32] ; A[11] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+88] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+24], r11 mov r11, QWORD PTR [r8+40] ; A[11] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+32], r10 mov r10, QWORD PTR [r8+48] ; A[11] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+88] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+40], r11 mov r11, QWORD PTR [r8+56] ; A[11] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+48], r10 mov r10, QWORD PTR [r8+64] ; A[11] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+88] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+56], r11 ; Zero into %r9 ; A[11] x A[9] mov rdx, QWORD PTR [r9+72] mulx r11, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, r12 mov QWORD PTR [r8+64], r10 ; Zero into %r8 ; A[11] x A[10] mov rdx, QWORD PTR [r9+80] mulx r10, rax, QWORD PTR [r9+88] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+72], r11 ; Carry adcx r10, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+80], r10 mov QWORD PTR [r8+88], r13 ; Double and Add in A[i] x A[i] mov r11, QWORD PTR [rbp+8] ; A[0] x A[0] mov rdx, QWORD PTR [r9] mulx rcx, rax, rdx mov QWORD PTR [rbp], rax adox r11, r11 adcx r11, rcx mov QWORD PTR [rbp+8], r11 mov r10, QWORD PTR [rbp+16] mov r11, QWORD PTR [rbp+24] ; A[1] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+16], r10 mov QWORD PTR [rbp+24], r11 mov r10, QWORD PTR [rbp+32] mov r11, QWORD PTR [rbp+40] ; A[2] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+32], r10 mov QWORD PTR [rbp+40], r11 mov r10, QWORD PTR [rbp+48] ; A[3] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, rdx adox r10, r10 adox r14, r14 adcx r10, rax adcx r14, rcx mov QWORD PTR [rbp+48], r10 ; A[4] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, rdx adox r15, r15 adox rdi, rdi adcx r15, rax adcx rdi, rcx ; A[5] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, rdx adox rsi, rsi adox rbx, rbx adcx rsi, rax adcx rbx, rcx mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[6] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8], r10 mov QWORD PTR [r8+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] ; A[7] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+16], r10 mov QWORD PTR [r8+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[8] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+32], r10 mov QWORD PTR [r8+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] ; A[9] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+48], r10 mov QWORD PTR [r8+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] ; A[10] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+64], r10 mov QWORD PTR [r8+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] ; A[11] x A[11] mov rdx, QWORD PTR [r9+88] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+80], r10 mov QWORD PTR [r8+88], r11 mov QWORD PTR [r8+-40], r14 mov QWORD PTR [r8+-32], r15 mov QWORD PTR [r8+-24], rdi mov QWORD PTR [r8+-16], rsi mov QWORD PTR [r8+-8], rbx sub r8, 96 cmp r9, r8 jne L_end_3072_sqr_avx2_12 vmovdqu xmm0, OWORD PTR [rbp] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbp+16] vmovups OWORD PTR [r8+16], xmm0 vmovdqu xmm0, OWORD PTR [rbp+32] vmovups OWORD PTR [r8+32], xmm0 mov rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+48], rax L_end_3072_sqr_avx2_12: add rsp, 96 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_3072_sqr_avx2_12 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sqr_24 PROC sub rsp, 208 mov QWORD PTR [rsp+192], rcx mov QWORD PTR [rsp+200], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+96] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov QWORD PTR [r10+88], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+88], r8 mov rdx, r10 mov rcx, rsp call sp_3072_sqr_12 mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] add rdx, 96 add rcx, 192 call sp_3072_sqr_12 mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] call sp_3072_sqr_12 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] ENDIF mov rdx, QWORD PTR [rsp+192] lea r10, QWORD PTR [rsp+96] add rdx, 288 mov r9, 0 mov r8, QWORD PTR [r10+-96] sub r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov QWORD PTR [r10+88], rax sbb r9, 0 sub rdx, 192 mov r8, QWORD PTR [r10+-96] sub r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov QWORD PTR [r10+88], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+192] neg r9 add rcx, 192 mov r8, QWORD PTR [rcx+-96] sub r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov QWORD PTR [rcx+88], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+192] add rcx, 288 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov QWORD PTR [rcx+88], rax mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] add rsp, 208 ret sp_3072_sqr_24 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sqr_avx2_24 PROC sub rsp, 208 mov QWORD PTR [rsp+192], rcx mov QWORD PTR [rsp+200], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+96] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov QWORD PTR [r10+88], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+88], r8 mov rdx, r10 mov rcx, rsp call sp_3072_sqr_avx2_12 mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] add rdx, 96 add rcx, 192 call sp_3072_sqr_avx2_12 mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] call sp_3072_sqr_avx2_12 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] ENDIF mov rdx, QWORD PTR [rsp+192] lea r10, QWORD PTR [rsp+96] add rdx, 288 mov r9, 0 mov r8, QWORD PTR [r10+-96] sub r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov QWORD PTR [r10+88], rax sbb r9, 0 sub rdx, 192 mov r8, QWORD PTR [r10+-96] sub r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov QWORD PTR [r10+88], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+192] neg r9 add rcx, 192 mov r8, QWORD PTR [rcx+-96] sub r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov QWORD PTR [rcx+88], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+192] add rcx, 288 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov QWORD PTR [rcx+88], rax mov rdx, QWORD PTR [rsp+200] mov rcx, QWORD PTR [rsp+192] add rsp, 208 ret sp_3072_sqr_avx2_24 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sqr_48 PROC sub rsp, 400 mov QWORD PTR [rsp+384], rcx mov QWORD PTR [rsp+392], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+192] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r10+88], r8 sbb rax, QWORD PTR [r11+96] mov r8, QWORD PTR [rdx+104] mov QWORD PTR [r10+96], rax sbb r8, QWORD PTR [r11+104] mov rax, QWORD PTR [rdx+112] mov QWORD PTR [r10+104], r8 sbb rax, QWORD PTR [r11+112] mov r8, QWORD PTR [rdx+120] mov QWORD PTR [r10+112], rax sbb r8, QWORD PTR [r11+120] mov rax, QWORD PTR [rdx+128] mov QWORD PTR [r10+120], r8 sbb rax, QWORD PTR [r11+128] mov r8, QWORD PTR [rdx+136] mov QWORD PTR [r10+128], rax sbb r8, QWORD PTR [r11+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r10+136], r8 sbb rax, QWORD PTR [r11+144] mov r8, QWORD PTR [rdx+152] mov QWORD PTR [r10+144], rax sbb r8, QWORD PTR [r11+152] mov rax, QWORD PTR [rdx+160] mov QWORD PTR [r10+152], r8 sbb rax, QWORD PTR [r11+160] mov r8, QWORD PTR [rdx+168] mov QWORD PTR [r10+160], rax sbb r8, QWORD PTR [r11+168] mov rax, QWORD PTR [rdx+176] mov QWORD PTR [r10+168], r8 sbb rax, QWORD PTR [r11+176] mov r8, QWORD PTR [rdx+184] mov QWORD PTR [r10+176], rax sbb r8, QWORD PTR [r11+184] mov QWORD PTR [r10+184], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+96] setc r11b mov QWORD PTR [r10+88], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+104] setc r11b mov QWORD PTR [r10+96], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+112] setc r11b mov QWORD PTR [r10+104], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+120] setc r11b mov QWORD PTR [r10+112], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+128] setc r11b mov QWORD PTR [r10+120], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+136] setc r11b mov QWORD PTR [r10+128], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+144] setc r11b mov QWORD PTR [r10+136], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+152] setc r11b mov QWORD PTR [r10+144], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+160] setc r11b mov QWORD PTR [r10+152], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+168] setc r11b mov QWORD PTR [r10+160], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+176] setc r11b mov QWORD PTR [r10+168], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+184] setc r11b mov QWORD PTR [r10+176], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+184], r8 mov rdx, r10 mov rcx, rsp call sp_3072_sqr_24 mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] add rdx, 192 add rcx, 384 call sp_3072_sqr_24 mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] call sp_3072_sqr_24 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] ENDIF mov rdx, QWORD PTR [rsp+384] lea r10, QWORD PTR [rsp+192] add rdx, 576 mov r9, 0 mov r8, QWORD PTR [r10+-192] sub r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov QWORD PTR [r10+184], rax sbb r9, 0 sub rdx, 384 mov r8, QWORD PTR [r10+-192] sub r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov QWORD PTR [r10+184], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+384] neg r9 add rcx, 384 mov r8, QWORD PTR [rcx+-192] sub r8, QWORD PTR [r10+-192] mov rax, QWORD PTR [rcx+-184] mov QWORD PTR [rcx+-192], r8 sbb rax, QWORD PTR [r10+-184] mov r8, QWORD PTR [rcx+-176] mov QWORD PTR [rcx+-184], rax sbb r8, QWORD PTR [r10+-176] mov rax, QWORD PTR [rcx+-168] mov QWORD PTR [rcx+-176], r8 sbb rax, QWORD PTR [r10+-168] mov r8, QWORD PTR [rcx+-160] mov QWORD PTR [rcx+-168], rax sbb r8, QWORD PTR [r10+-160] mov rax, QWORD PTR [rcx+-152] mov QWORD PTR [rcx+-160], r8 sbb rax, QWORD PTR [r10+-152] mov r8, QWORD PTR [rcx+-144] mov QWORD PTR [rcx+-152], rax sbb r8, QWORD PTR [r10+-144] mov rax, QWORD PTR [rcx+-136] mov QWORD PTR [rcx+-144], r8 sbb rax, QWORD PTR [r10+-136] mov r8, QWORD PTR [rcx+-128] mov QWORD PTR [rcx+-136], rax sbb r8, QWORD PTR [r10+-128] mov rax, QWORD PTR [rcx+-120] mov QWORD PTR [rcx+-128], r8 sbb rax, QWORD PTR [r10+-120] mov r8, QWORD PTR [rcx+-112] mov QWORD PTR [rcx+-120], rax sbb r8, QWORD PTR [r10+-112] mov rax, QWORD PTR [rcx+-104] mov QWORD PTR [rcx+-112], r8 sbb rax, QWORD PTR [r10+-104] mov r8, QWORD PTR [rcx+-96] mov QWORD PTR [rcx+-104], rax sbb r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax sbb r8, QWORD PTR [r10+96] mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb rax, QWORD PTR [r10+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax sbb r8, QWORD PTR [r10+112] mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb rax, QWORD PTR [r10+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax sbb r8, QWORD PTR [r10+128] mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb rax, QWORD PTR [r10+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax sbb r8, QWORD PTR [r10+144] mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb rax, QWORD PTR [r10+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax sbb r8, QWORD PTR [r10+160] mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb rax, QWORD PTR [r10+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax sbb r8, QWORD PTR [r10+176] mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb rax, QWORD PTR [r10+184] mov QWORD PTR [rcx+184], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+384] add rcx, 576 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax adc r8, 0 mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 adc rax, 0 mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax adc r8, 0 mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 adc rax, 0 mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax adc r8, 0 mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 adc rax, 0 mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax adc r8, 0 mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 adc rax, 0 mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax adc r8, 0 mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 adc rax, 0 mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax adc r8, 0 mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 adc rax, 0 mov QWORD PTR [rcx+184], rax mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] add rsp, 400 ret sp_3072_sqr_48 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sqr_avx2_48 PROC sub rsp, 400 mov QWORD PTR [rsp+384], rcx mov QWORD PTR [rsp+392], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+192] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r10+88], r8 sbb rax, QWORD PTR [r11+96] mov r8, QWORD PTR [rdx+104] mov QWORD PTR [r10+96], rax sbb r8, QWORD PTR [r11+104] mov rax, QWORD PTR [rdx+112] mov QWORD PTR [r10+104], r8 sbb rax, QWORD PTR [r11+112] mov r8, QWORD PTR [rdx+120] mov QWORD PTR [r10+112], rax sbb r8, QWORD PTR [r11+120] mov rax, QWORD PTR [rdx+128] mov QWORD PTR [r10+120], r8 sbb rax, QWORD PTR [r11+128] mov r8, QWORD PTR [rdx+136] mov QWORD PTR [r10+128], rax sbb r8, QWORD PTR [r11+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r10+136], r8 sbb rax, QWORD PTR [r11+144] mov r8, QWORD PTR [rdx+152] mov QWORD PTR [r10+144], rax sbb r8, QWORD PTR [r11+152] mov rax, QWORD PTR [rdx+160] mov QWORD PTR [r10+152], r8 sbb rax, QWORD PTR [r11+160] mov r8, QWORD PTR [rdx+168] mov QWORD PTR [r10+160], rax sbb r8, QWORD PTR [r11+168] mov rax, QWORD PTR [rdx+176] mov QWORD PTR [r10+168], r8 sbb rax, QWORD PTR [r11+176] mov r8, QWORD PTR [rdx+184] mov QWORD PTR [r10+176], rax sbb r8, QWORD PTR [r11+184] mov QWORD PTR [r10+184], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+96] setc r11b mov QWORD PTR [r10+88], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+104] setc r11b mov QWORD PTR [r10+96], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+112] setc r11b mov QWORD PTR [r10+104], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+120] setc r11b mov QWORD PTR [r10+112], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+128] setc r11b mov QWORD PTR [r10+120], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+136] setc r11b mov QWORD PTR [r10+128], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+144] setc r11b mov QWORD PTR [r10+136], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+152] setc r11b mov QWORD PTR [r10+144], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+160] setc r11b mov QWORD PTR [r10+152], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+168] setc r11b mov QWORD PTR [r10+160], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+176] setc r11b mov QWORD PTR [r10+168], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+184] setc r11b mov QWORD PTR [r10+176], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+184], r8 mov rdx, r10 mov rcx, rsp call sp_3072_sqr_avx2_24 mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] add rdx, 192 add rcx, 384 call sp_3072_sqr_avx2_24 mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] call sp_3072_sqr_avx2_24 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] ENDIF mov rdx, QWORD PTR [rsp+384] lea r10, QWORD PTR [rsp+192] add rdx, 576 mov r9, 0 mov r8, QWORD PTR [r10+-192] sub r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov QWORD PTR [r10+184], rax sbb r9, 0 sub rdx, 384 mov r8, QWORD PTR [r10+-192] sub r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov QWORD PTR [r10+184], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+384] neg r9 add rcx, 384 mov r8, QWORD PTR [rcx+-192] sub r8, QWORD PTR [r10+-192] mov rax, QWORD PTR [rcx+-184] mov QWORD PTR [rcx+-192], r8 sbb rax, QWORD PTR [r10+-184] mov r8, QWORD PTR [rcx+-176] mov QWORD PTR [rcx+-184], rax sbb r8, QWORD PTR [r10+-176] mov rax, QWORD PTR [rcx+-168] mov QWORD PTR [rcx+-176], r8 sbb rax, QWORD PTR [r10+-168] mov r8, QWORD PTR [rcx+-160] mov QWORD PTR [rcx+-168], rax sbb r8, QWORD PTR [r10+-160] mov rax, QWORD PTR [rcx+-152] mov QWORD PTR [rcx+-160], r8 sbb rax, QWORD PTR [r10+-152] mov r8, QWORD PTR [rcx+-144] mov QWORD PTR [rcx+-152], rax sbb r8, QWORD PTR [r10+-144] mov rax, QWORD PTR [rcx+-136] mov QWORD PTR [rcx+-144], r8 sbb rax, QWORD PTR [r10+-136] mov r8, QWORD PTR [rcx+-128] mov QWORD PTR [rcx+-136], rax sbb r8, QWORD PTR [r10+-128] mov rax, QWORD PTR [rcx+-120] mov QWORD PTR [rcx+-128], r8 sbb rax, QWORD PTR [r10+-120] mov r8, QWORD PTR [rcx+-112] mov QWORD PTR [rcx+-120], rax sbb r8, QWORD PTR [r10+-112] mov rax, QWORD PTR [rcx+-104] mov QWORD PTR [rcx+-112], r8 sbb rax, QWORD PTR [r10+-104] mov r8, QWORD PTR [rcx+-96] mov QWORD PTR [rcx+-104], rax sbb r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax sbb r8, QWORD PTR [r10+96] mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb rax, QWORD PTR [r10+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax sbb r8, QWORD PTR [r10+112] mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb rax, QWORD PTR [r10+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax sbb r8, QWORD PTR [r10+128] mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb rax, QWORD PTR [r10+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax sbb r8, QWORD PTR [r10+144] mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb rax, QWORD PTR [r10+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax sbb r8, QWORD PTR [r10+160] mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb rax, QWORD PTR [r10+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax sbb r8, QWORD PTR [r10+176] mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb rax, QWORD PTR [r10+184] mov QWORD PTR [rcx+184], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+384] add rcx, 576 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax adc r8, 0 mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 adc rax, 0 mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax adc r8, 0 mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 adc rax, 0 mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax adc r8, 0 mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 adc rax, 0 mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax adc r8, 0 mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 adc rax, 0 mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax adc r8, 0 mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 adc rax, 0 mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax adc r8, 0 mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 adc rax, 0 mov QWORD PTR [rcx+184], rax mov rdx, QWORD PTR [rsp+392] mov rcx, QWORD PTR [rsp+384] add rsp, 400 ret sp_3072_sqr_avx2_48 ENDP _text ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_3072_mul_d_48 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+40] add r12, rax mov QWORD PTR [rcx+40], r12 adc r10, rdx adc r11, 0 ; A[6] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+48] add r10, rax mov QWORD PTR [rcx+48], r10 adc r11, rdx adc r12, 0 ; A[7] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+56] add r11, rax mov QWORD PTR [rcx+56], r11 adc r12, rdx adc r10, 0 ; A[8] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+64] add r12, rax mov QWORD PTR [rcx+64], r12 adc r10, rdx adc r11, 0 ; A[9] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+72] add r10, rax mov QWORD PTR [rcx+72], r10 adc r11, rdx adc r12, 0 ; A[10] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+80] add r11, rax mov QWORD PTR [rcx+80], r11 adc r12, rdx adc r10, 0 ; A[11] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+88] add r12, rax mov QWORD PTR [rcx+88], r12 adc r10, rdx adc r11, 0 ; A[12] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+96] add r10, rax mov QWORD PTR [rcx+96], r10 adc r11, rdx adc r12, 0 ; A[13] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+104] add r11, rax mov QWORD PTR [rcx+104], r11 adc r12, rdx adc r10, 0 ; A[14] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+112] add r12, rax mov QWORD PTR [rcx+112], r12 adc r10, rdx adc r11, 0 ; A[15] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+120] add r10, rax mov QWORD PTR [rcx+120], r10 adc r11, rdx adc r12, 0 ; A[16] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+128] add r11, rax mov QWORD PTR [rcx+128], r11 adc r12, rdx adc r10, 0 ; A[17] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+136] add r12, rax mov QWORD PTR [rcx+136], r12 adc r10, rdx adc r11, 0 ; A[18] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+144] add r10, rax mov QWORD PTR [rcx+144], r10 adc r11, rdx adc r12, 0 ; A[19] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+152] add r11, rax mov QWORD PTR [rcx+152], r11 adc r12, rdx adc r10, 0 ; A[20] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+160] add r12, rax mov QWORD PTR [rcx+160], r12 adc r10, rdx adc r11, 0 ; A[21] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+168] add r10, rax mov QWORD PTR [rcx+168], r10 adc r11, rdx adc r12, 0 ; A[22] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+176] add r11, rax mov QWORD PTR [rcx+176], r11 adc r12, rdx adc r10, 0 ; A[23] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+184] add r12, rax mov QWORD PTR [rcx+184], r12 adc r10, rdx adc r11, 0 ; A[24] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+192] add r10, rax mov QWORD PTR [rcx+192], r10 adc r11, rdx adc r12, 0 ; A[25] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+200] add r11, rax mov QWORD PTR [rcx+200], r11 adc r12, rdx adc r10, 0 ; A[26] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+208] add r12, rax mov QWORD PTR [rcx+208], r12 adc r10, rdx adc r11, 0 ; A[27] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+216] add r10, rax mov QWORD PTR [rcx+216], r10 adc r11, rdx adc r12, 0 ; A[28] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+224] add r11, rax mov QWORD PTR [rcx+224], r11 adc r12, rdx adc r10, 0 ; A[29] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+232] add r12, rax mov QWORD PTR [rcx+232], r12 adc r10, rdx adc r11, 0 ; A[30] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+240] add r10, rax mov QWORD PTR [rcx+240], r10 adc r11, rdx adc r12, 0 ; A[31] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+248] add r11, rax mov QWORD PTR [rcx+248], r11 adc r12, rdx adc r10, 0 ; A[32] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+256] add r12, rax mov QWORD PTR [rcx+256], r12 adc r10, rdx adc r11, 0 ; A[33] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+264] add r10, rax mov QWORD PTR [rcx+264], r10 adc r11, rdx adc r12, 0 ; A[34] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+272] add r11, rax mov QWORD PTR [rcx+272], r11 adc r12, rdx adc r10, 0 ; A[35] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+280] add r12, rax mov QWORD PTR [rcx+280], r12 adc r10, rdx adc r11, 0 ; A[36] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+288] add r10, rax mov QWORD PTR [rcx+288], r10 adc r11, rdx adc r12, 0 ; A[37] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+296] add r11, rax mov QWORD PTR [rcx+296], r11 adc r12, rdx adc r10, 0 ; A[38] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+304] add r12, rax mov QWORD PTR [rcx+304], r12 adc r10, rdx adc r11, 0 ; A[39] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+312] add r10, rax mov QWORD PTR [rcx+312], r10 adc r11, rdx adc r12, 0 ; A[40] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+320] add r11, rax mov QWORD PTR [rcx+320], r11 adc r12, rdx adc r10, 0 ; A[41] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+328] add r12, rax mov QWORD PTR [rcx+328], r12 adc r10, rdx adc r11, 0 ; A[42] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+336] add r10, rax mov QWORD PTR [rcx+336], r10 adc r11, rdx adc r12, 0 ; A[43] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+344] add r11, rax mov QWORD PTR [rcx+344], r11 adc r12, rdx adc r10, 0 ; A[44] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+352] add r12, rax mov QWORD PTR [rcx+352], r12 adc r10, rdx adc r11, 0 ; A[45] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+360] add r10, rax mov QWORD PTR [rcx+360], r10 adc r11, rdx adc r12, 0 ; A[46] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+368] add r11, rax mov QWORD PTR [rcx+368], r11 adc r12, rdx adc r10, 0 ; A[47] * B mov rax, r8 mul QWORD PTR [r9+376] add r12, rax adc r10, rdx mov QWORD PTR [rcx+376], r12 mov QWORD PTR [rcx+384], r10 pop r12 ret sp_3072_mul_d_48 ENDP _text ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_3072_cond_sub_24 PROC sub rsp, 192 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [r8+128] mov r11, QWORD PTR [r8+136] and r10, r9 and r11, r9 mov QWORD PTR [rsp+128], r10 mov QWORD PTR [rsp+136], r11 mov r10, QWORD PTR [r8+144] mov r11, QWORD PTR [r8+152] and r10, r9 and r11, r9 mov QWORD PTR [rsp+144], r10 mov QWORD PTR [rsp+152], r11 mov r10, QWORD PTR [r8+160] mov r11, QWORD PTR [r8+168] and r10, r9 and r11, r9 mov QWORD PTR [rsp+160], r10 mov QWORD PTR [rsp+168], r11 mov r10, QWORD PTR [r8+176] mov r11, QWORD PTR [r8+184] and r10, r9 and r11, r9 mov QWORD PTR [rsp+176], r10 mov QWORD PTR [rsp+184], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] sbb r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] sbb r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] sbb r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] sbb r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] sbb r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] sbb r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] sbb r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] sbb r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] sbb r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] sbb r11, r8 mov QWORD PTR [rcx+112], r10 mov r10, QWORD PTR [rdx+128] mov r8, QWORD PTR [rsp+128] sbb r10, r8 mov QWORD PTR [rcx+120], r11 mov r11, QWORD PTR [rdx+136] mov r8, QWORD PTR [rsp+136] sbb r11, r8 mov QWORD PTR [rcx+128], r10 mov r10, QWORD PTR [rdx+144] mov r8, QWORD PTR [rsp+144] sbb r10, r8 mov QWORD PTR [rcx+136], r11 mov r11, QWORD PTR [rdx+152] mov r8, QWORD PTR [rsp+152] sbb r11, r8 mov QWORD PTR [rcx+144], r10 mov r10, QWORD PTR [rdx+160] mov r8, QWORD PTR [rsp+160] sbb r10, r8 mov QWORD PTR [rcx+152], r11 mov r11, QWORD PTR [rdx+168] mov r8, QWORD PTR [rsp+168] sbb r11, r8 mov QWORD PTR [rcx+160], r10 mov r10, QWORD PTR [rdx+176] mov r8, QWORD PTR [rsp+176] sbb r10, r8 mov QWORD PTR [rcx+168], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rsp+184] sbb r11, r8 mov QWORD PTR [rcx+176], r10 mov QWORD PTR [rcx+184], r11 sbb rax, rax add rsp, 192 ret sp_3072_cond_sub_24 ENDP _text ENDS ; /* Reduce the number back to 3072 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_3072_mont_reduce_24 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 24 mov r10, 24 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_3072_mont_reduce_24_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+40], r14 adc r11, 0 ; a[i+6] += m[6] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+48] mov r14, QWORD PTR [rcx+48] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+48], r14 adc r12, 0 ; a[i+7] += m[7] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+56] mov r14, QWORD PTR [rcx+56] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+56], r14 adc r11, 0 ; a[i+8] += m[8] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+64] mov r14, QWORD PTR [rcx+64] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+64], r14 adc r12, 0 ; a[i+9] += m[9] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+72] mov r14, QWORD PTR [rcx+72] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+72], r14 adc r11, 0 ; a[i+10] += m[10] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+80] mov r14, QWORD PTR [rcx+80] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+80], r14 adc r12, 0 ; a[i+11] += m[11] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+88] mov r14, QWORD PTR [rcx+88] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+88], r14 adc r11, 0 ; a[i+12] += m[12] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+96] mov r14, QWORD PTR [rcx+96] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+96], r14 adc r12, 0 ; a[i+13] += m[13] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+104] mov r14, QWORD PTR [rcx+104] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+104], r14 adc r11, 0 ; a[i+14] += m[14] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+112] mov r14, QWORD PTR [rcx+112] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+112], r14 adc r12, 0 ; a[i+15] += m[15] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+120] mov r14, QWORD PTR [rcx+120] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+120], r14 adc r11, 0 ; a[i+16] += m[16] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+128] mov r14, QWORD PTR [rcx+128] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+128], r14 adc r12, 0 ; a[i+17] += m[17] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+136] mov r14, QWORD PTR [rcx+136] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+136], r14 adc r11, 0 ; a[i+18] += m[18] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+144] mov r14, QWORD PTR [rcx+144] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+144], r14 adc r12, 0 ; a[i+19] += m[19] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+152] mov r14, QWORD PTR [rcx+152] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+152], r14 adc r11, 0 ; a[i+20] += m[20] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+160] mov r14, QWORD PTR [rcx+160] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+160], r14 adc r12, 0 ; a[i+21] += m[21] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+168] mov r14, QWORD PTR [rcx+168] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+168], r14 adc r11, 0 ; a[i+22] += m[22] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+176] mov r14, QWORD PTR [rcx+176] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+176], r14 adc r12, 0 ; a[i+23] += m[23] * mu mov rax, r13 mul QWORD PTR [r9+184] mov r14, QWORD PTR [rcx+184] add r12, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r12 mov QWORD PTR [rcx+184], r14 adc QWORD PTR [rcx+192], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_3072_mont_reduce_24_loop mov QWORD PTR [rcx], r15 mov QWORD PTR [rcx+8], rdi neg rsi IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx mov rcx, rcx sub rcx, 192 call sp_3072_cond_sub_24 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mont_reduce_24 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_3072_cond_sub_avx2_24 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 sbb r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 sbb r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 sbb r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 sbb r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 sbb r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 sbb r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 sbb r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 sbb r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 sbb r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 sbb r10, r11 mov r12, QWORD PTR [r8+128] mov r11, QWORD PTR [rdx+128] pext r12, r12, r9 mov QWORD PTR [rcx+120], r10 sbb r11, r12 mov r10, QWORD PTR [r8+136] mov r12, QWORD PTR [rdx+136] pext r10, r10, r9 mov QWORD PTR [rcx+128], r11 sbb r12, r10 mov r11, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+144] pext r11, r11, r9 mov QWORD PTR [rcx+136], r12 sbb r10, r11 mov r12, QWORD PTR [r8+152] mov r11, QWORD PTR [rdx+152] pext r12, r12, r9 mov QWORD PTR [rcx+144], r10 sbb r11, r12 mov r10, QWORD PTR [r8+160] mov r12, QWORD PTR [rdx+160] pext r10, r10, r9 mov QWORD PTR [rcx+152], r11 sbb r12, r10 mov r11, QWORD PTR [r8+168] mov r10, QWORD PTR [rdx+168] pext r11, r11, r9 mov QWORD PTR [rcx+160], r12 sbb r10, r11 mov r12, QWORD PTR [r8+176] mov r11, QWORD PTR [rdx+176] pext r12, r12, r9 mov QWORD PTR [rcx+168], r10 sbb r11, r12 mov r10, QWORD PTR [r8+184] mov r12, QWORD PTR [rdx+184] pext r10, r10, r9 mov QWORD PTR [rcx+176], r11 sbb r12, r10 mov QWORD PTR [rcx+184], r12 sbb rax, rax pop r12 ret sp_3072_cond_sub_avx2_24 ENDP _text ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_3072_mul_d_24 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+40] add r12, rax mov QWORD PTR [rcx+40], r12 adc r10, rdx adc r11, 0 ; A[6] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+48] add r10, rax mov QWORD PTR [rcx+48], r10 adc r11, rdx adc r12, 0 ; A[7] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+56] add r11, rax mov QWORD PTR [rcx+56], r11 adc r12, rdx adc r10, 0 ; A[8] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+64] add r12, rax mov QWORD PTR [rcx+64], r12 adc r10, rdx adc r11, 0 ; A[9] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+72] add r10, rax mov QWORD PTR [rcx+72], r10 adc r11, rdx adc r12, 0 ; A[10] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+80] add r11, rax mov QWORD PTR [rcx+80], r11 adc r12, rdx adc r10, 0 ; A[11] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+88] add r12, rax mov QWORD PTR [rcx+88], r12 adc r10, rdx adc r11, 0 ; A[12] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+96] add r10, rax mov QWORD PTR [rcx+96], r10 adc r11, rdx adc r12, 0 ; A[13] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+104] add r11, rax mov QWORD PTR [rcx+104], r11 adc r12, rdx adc r10, 0 ; A[14] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+112] add r12, rax mov QWORD PTR [rcx+112], r12 adc r10, rdx adc r11, 0 ; A[15] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+120] add r10, rax mov QWORD PTR [rcx+120], r10 adc r11, rdx adc r12, 0 ; A[16] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+128] add r11, rax mov QWORD PTR [rcx+128], r11 adc r12, rdx adc r10, 0 ; A[17] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+136] add r12, rax mov QWORD PTR [rcx+136], r12 adc r10, rdx adc r11, 0 ; A[18] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+144] add r10, rax mov QWORD PTR [rcx+144], r10 adc r11, rdx adc r12, 0 ; A[19] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+152] add r11, rax mov QWORD PTR [rcx+152], r11 adc r12, rdx adc r10, 0 ; A[20] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+160] add r12, rax mov QWORD PTR [rcx+160], r12 adc r10, rdx adc r11, 0 ; A[21] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+168] add r10, rax mov QWORD PTR [rcx+168], r10 adc r11, rdx adc r12, 0 ; A[22] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+176] add r11, rax mov QWORD PTR [rcx+176], r11 adc r12, rdx adc r10, 0 ; A[23] * B mov rax, r8 mul QWORD PTR [r9+184] add r12, rax adc r10, rdx mov QWORD PTR [rcx+184], r12 mov QWORD PTR [rcx+192], r10 pop r12 ret sp_3072_mul_d_24 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_3072_mul_d_avx2_24 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; A[6] * B mulx r10, r9, QWORD PTR [rax+48] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; A[7] * B mulx r10, r9, QWORD PTR [rax+56] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; A[8] * B mulx r10, r9, QWORD PTR [rax+64] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+64], r11 ; A[9] * B mulx r10, r9, QWORD PTR [rax+72] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+72], r12 ; A[10] * B mulx r10, r9, QWORD PTR [rax+80] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+80], r11 ; A[11] * B mulx r10, r9, QWORD PTR [rax+88] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+88], r12 ; A[12] * B mulx r10, r9, QWORD PTR [rax+96] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+96], r11 ; A[13] * B mulx r10, r9, QWORD PTR [rax+104] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+104], r12 ; A[14] * B mulx r10, r9, QWORD PTR [rax+112] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+112], r11 ; A[15] * B mulx r10, r9, QWORD PTR [rax+120] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+120], r12 ; A[16] * B mulx r10, r9, QWORD PTR [rax+128] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+128], r11 ; A[17] * B mulx r10, r9, QWORD PTR [rax+136] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+136], r12 ; A[18] * B mulx r10, r9, QWORD PTR [rax+144] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+144], r11 ; A[19] * B mulx r10, r9, QWORD PTR [rax+152] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+152], r12 ; A[20] * B mulx r10, r9, QWORD PTR [rax+160] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+160], r11 ; A[21] * B mulx r10, r9, QWORD PTR [rax+168] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+168], r12 ; A[22] * B mulx r10, r9, QWORD PTR [rax+176] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+176], r11 ; A[23] * B mulx r10, r9, QWORD PTR [rax+184] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+184], r12 mov QWORD PTR [rcx+192], r11 pop r13 pop r12 ret sp_3072_mul_d_avx2_24 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_3072_word_asm_24 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_3072_word_asm_24 ENDP _text ENDS ENDIF ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_3072_cmp_24 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+184] mov r12, QWORD PTR [rdx+184] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+176] mov r12, QWORD PTR [rdx+176] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+168] mov r12, QWORD PTR [rdx+168] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+160] mov r12, QWORD PTR [rdx+160] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+152] mov r12, QWORD PTR [rdx+152] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+144] mov r12, QWORD PTR [rdx+144] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+136] mov r12, QWORD PTR [rdx+136] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+128] mov r12, QWORD PTR [rdx+128] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+120] mov r12, QWORD PTR [rdx+120] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+112] mov r12, QWORD PTR [rdx+112] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+104] mov r12, QWORD PTR [rdx+104] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+96] mov r12, QWORD PTR [rdx+96] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+88] mov r12, QWORD PTR [rdx+88] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+80] mov r12, QWORD PTR [rdx+80] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+72] mov r12, QWORD PTR [rdx+72] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+64] mov r12, QWORD PTR [rdx+64] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+56] mov r12, QWORD PTR [rdx+56] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+48] mov r12, QWORD PTR [rdx+48] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_3072_cmp_24 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_3072_get_from_table_24 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax pxor xmm13, xmm13 pshufd xmm11, xmm11, 0 pshufd xmm10, xmm10, 0 ; START: 0-7 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 0-7 ; START: 8-15 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 8-15 ; START: 16-23 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 ; END: 16-23 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_3072_get_from_table_24 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 3072 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_3072_mont_reduce_avx2_24 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r9, rcx mov r10, rdx xor rbp, rbp ; i = 24 mov r11, 24 mov r14, QWORD PTR [r9] mov r15, QWORD PTR [r9+8] mov rdi, QWORD PTR [r9+16] mov rsi, QWORD PTR [r9+24] add r9, 96 xor rbp, rbp L_3072_mont_reduce_avx2_24_loop: ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-64] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-56] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-56], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+-40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-48], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+-32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-40], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+-24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-32], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+-16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-24], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+-8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-16], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-8], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+16] += m[16] * mu mulx rcx, rax, QWORD PTR [r10+128] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 ; a[i+17] += m[17] * mu mulx rcx, rax, QWORD PTR [r10+136] mov r12, QWORD PTR [r9+48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+40], r13 ; a[i+18] += m[18] * mu mulx rcx, rax, QWORD PTR [r10+144] mov r13, QWORD PTR [r9+56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+48], r12 ; a[i+19] += m[19] * mu mulx rcx, rax, QWORD PTR [r10+152] mov r12, QWORD PTR [r9+64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+56], r13 ; a[i+20] += m[20] * mu mulx rcx, rax, QWORD PTR [r10+160] mov r13, QWORD PTR [r9+72] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+64], r12 ; a[i+21] += m[21] * mu mulx rcx, rax, QWORD PTR [r10+168] mov r12, QWORD PTR [r9+80] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+72], r13 ; a[i+22] += m[22] * mu mulx rcx, rax, QWORD PTR [r10+176] mov r13, QWORD PTR [r9+88] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+80], r12 ; a[i+23] += m[23] * mu mulx rcx, rax, QWORD PTR [r10+184] mov r12, QWORD PTR [r9+96] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+88], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+96], r12 adox rbp, rbx adcx rbp, rbx ; a += 1 add r9, 8 ; i -= 1 sub r11, 1 jnz L_3072_mont_reduce_avx2_24_loop sub r9, 96 neg rbp mov r8, r9 sub r9, 192 mov rcx, QWORD PTR [r10] mov rdx, r14 pext rcx, rcx, rbp sub rdx, rcx mov rcx, QWORD PTR [r10+8] mov rax, r15 pext rcx, rcx, rbp mov QWORD PTR [r9], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+16] mov rcx, rdi pext rdx, rdx, rbp mov QWORD PTR [r9+8], rax sbb rcx, rdx mov rax, QWORD PTR [r10+24] mov rdx, rsi pext rax, rax, rbp mov QWORD PTR [r9+16], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+32] mov rax, QWORD PTR [r8+32] pext rcx, rcx, rbp mov QWORD PTR [r9+24], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+40] mov rcx, QWORD PTR [r8+40] pext rdx, rdx, rbp mov QWORD PTR [r9+32], rax sbb rcx, rdx mov rax, QWORD PTR [r10+48] mov rdx, QWORD PTR [r8+48] pext rax, rax, rbp mov QWORD PTR [r9+40], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+56] mov rax, QWORD PTR [r8+56] pext rcx, rcx, rbp mov QWORD PTR [r9+48], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+64] mov rcx, QWORD PTR [r8+64] pext rdx, rdx, rbp mov QWORD PTR [r9+56], rax sbb rcx, rdx mov rax, QWORD PTR [r10+72] mov rdx, QWORD PTR [r8+72] pext rax, rax, rbp mov QWORD PTR [r9+64], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+80] mov rax, QWORD PTR [r8+80] pext rcx, rcx, rbp mov QWORD PTR [r9+72], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+88] mov rcx, QWORD PTR [r8+88] pext rdx, rdx, rbp mov QWORD PTR [r9+80], rax sbb rcx, rdx mov rax, QWORD PTR [r10+96] mov rdx, QWORD PTR [r8+96] pext rax, rax, rbp mov QWORD PTR [r9+88], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+104] mov rax, QWORD PTR [r8+104] pext rcx, rcx, rbp mov QWORD PTR [r9+96], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+112] mov rcx, QWORD PTR [r8+112] pext rdx, rdx, rbp mov QWORD PTR [r9+104], rax sbb rcx, rdx mov rax, QWORD PTR [r10+120] mov rdx, QWORD PTR [r8+120] pext rax, rax, rbp mov QWORD PTR [r9+112], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+128] mov rax, QWORD PTR [r8+128] pext rcx, rcx, rbp mov QWORD PTR [r9+120], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+136] mov rcx, QWORD PTR [r8+136] pext rdx, rdx, rbp mov QWORD PTR [r9+128], rax sbb rcx, rdx mov rax, QWORD PTR [r10+144] mov rdx, QWORD PTR [r8+144] pext rax, rax, rbp mov QWORD PTR [r9+136], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+152] mov rax, QWORD PTR [r8+152] pext rcx, rcx, rbp mov QWORD PTR [r9+144], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+160] mov rcx, QWORD PTR [r8+160] pext rdx, rdx, rbp mov QWORD PTR [r9+152], rax sbb rcx, rdx mov rax, QWORD PTR [r10+168] mov rdx, QWORD PTR [r8+168] pext rax, rax, rbp mov QWORD PTR [r9+160], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+176] mov rax, QWORD PTR [r8+176] pext rcx, rcx, rbp mov QWORD PTR [r9+168], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+184] mov rcx, QWORD PTR [r8+184] pext rdx, rdx, rbp mov QWORD PTR [r9+176], rax sbb rcx, rdx mov QWORD PTR [r9+184], rcx pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mont_reduce_avx2_24 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_3072_get_from_table_avx2_24 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax vpxor ymm13, ymm13, ymm13 vpermd ymm10, ymm13, ymm10 vpermd ymm11, ymm13, ymm11 ; START: 0-15 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 add rcx, 128 ; END: 0-15 ; START: 16-23 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 16 mov r9, QWORD PTR [rdx+128] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 17 mov r9, QWORD PTR [rdx+136] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 18 mov r9, QWORD PTR [rdx+144] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 19 mov r9, QWORD PTR [rdx+152] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 20 mov r9, QWORD PTR [rdx+160] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 21 mov r9, QWORD PTR [rdx+168] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 22 mov r9, QWORD PTR [rdx+176] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 23 mov r9, QWORD PTR [rdx+184] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 24 mov r9, QWORD PTR [rdx+192] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 25 mov r9, QWORD PTR [rdx+200] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 26 mov r9, QWORD PTR [rdx+208] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 27 mov r9, QWORD PTR [rdx+216] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 28 mov r9, QWORD PTR [rdx+224] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 29 mov r9, QWORD PTR [rdx+232] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 30 mov r9, QWORD PTR [rdx+240] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 31 mov r9, QWORD PTR [rdx+248] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 ; END: 16-23 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_3072_get_from_table_avx2_24 ENDP _text ENDS ENDIF ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_3072_cond_sub_48 PROC sub rsp, 384 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [r8+128] mov r11, QWORD PTR [r8+136] and r10, r9 and r11, r9 mov QWORD PTR [rsp+128], r10 mov QWORD PTR [rsp+136], r11 mov r10, QWORD PTR [r8+144] mov r11, QWORD PTR [r8+152] and r10, r9 and r11, r9 mov QWORD PTR [rsp+144], r10 mov QWORD PTR [rsp+152], r11 mov r10, QWORD PTR [r8+160] mov r11, QWORD PTR [r8+168] and r10, r9 and r11, r9 mov QWORD PTR [rsp+160], r10 mov QWORD PTR [rsp+168], r11 mov r10, QWORD PTR [r8+176] mov r11, QWORD PTR [r8+184] and r10, r9 and r11, r9 mov QWORD PTR [rsp+176], r10 mov QWORD PTR [rsp+184], r11 mov r10, QWORD PTR [r8+192] mov r11, QWORD PTR [r8+200] and r10, r9 and r11, r9 mov QWORD PTR [rsp+192], r10 mov QWORD PTR [rsp+200], r11 mov r10, QWORD PTR [r8+208] mov r11, QWORD PTR [r8+216] and r10, r9 and r11, r9 mov QWORD PTR [rsp+208], r10 mov QWORD PTR [rsp+216], r11 mov r10, QWORD PTR [r8+224] mov r11, QWORD PTR [r8+232] and r10, r9 and r11, r9 mov QWORD PTR [rsp+224], r10 mov QWORD PTR [rsp+232], r11 mov r10, QWORD PTR [r8+240] mov r11, QWORD PTR [r8+248] and r10, r9 and r11, r9 mov QWORD PTR [rsp+240], r10 mov QWORD PTR [rsp+248], r11 mov r10, QWORD PTR [r8+256] mov r11, QWORD PTR [r8+264] and r10, r9 and r11, r9 mov QWORD PTR [rsp+256], r10 mov QWORD PTR [rsp+264], r11 mov r10, QWORD PTR [r8+272] mov r11, QWORD PTR [r8+280] and r10, r9 and r11, r9 mov QWORD PTR [rsp+272], r10 mov QWORD PTR [rsp+280], r11 mov r10, QWORD PTR [r8+288] mov r11, QWORD PTR [r8+296] and r10, r9 and r11, r9 mov QWORD PTR [rsp+288], r10 mov QWORD PTR [rsp+296], r11 mov r10, QWORD PTR [r8+304] mov r11, QWORD PTR [r8+312] and r10, r9 and r11, r9 mov QWORD PTR [rsp+304], r10 mov QWORD PTR [rsp+312], r11 mov r10, QWORD PTR [r8+320] mov r11, QWORD PTR [r8+328] and r10, r9 and r11, r9 mov QWORD PTR [rsp+320], r10 mov QWORD PTR [rsp+328], r11 mov r10, QWORD PTR [r8+336] mov r11, QWORD PTR [r8+344] and r10, r9 and r11, r9 mov QWORD PTR [rsp+336], r10 mov QWORD PTR [rsp+344], r11 mov r10, QWORD PTR [r8+352] mov r11, QWORD PTR [r8+360] and r10, r9 and r11, r9 mov QWORD PTR [rsp+352], r10 mov QWORD PTR [rsp+360], r11 mov r10, QWORD PTR [r8+368] mov r11, QWORD PTR [r8+376] and r10, r9 and r11, r9 mov QWORD PTR [rsp+368], r10 mov QWORD PTR [rsp+376], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] sbb r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] sbb r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] sbb r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] sbb r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] sbb r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] sbb r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] sbb r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] sbb r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] sbb r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] sbb r11, r8 mov QWORD PTR [rcx+112], r10 mov r10, QWORD PTR [rdx+128] mov r8, QWORD PTR [rsp+128] sbb r10, r8 mov QWORD PTR [rcx+120], r11 mov r11, QWORD PTR [rdx+136] mov r8, QWORD PTR [rsp+136] sbb r11, r8 mov QWORD PTR [rcx+128], r10 mov r10, QWORD PTR [rdx+144] mov r8, QWORD PTR [rsp+144] sbb r10, r8 mov QWORD PTR [rcx+136], r11 mov r11, QWORD PTR [rdx+152] mov r8, QWORD PTR [rsp+152] sbb r11, r8 mov QWORD PTR [rcx+144], r10 mov r10, QWORD PTR [rdx+160] mov r8, QWORD PTR [rsp+160] sbb r10, r8 mov QWORD PTR [rcx+152], r11 mov r11, QWORD PTR [rdx+168] mov r8, QWORD PTR [rsp+168] sbb r11, r8 mov QWORD PTR [rcx+160], r10 mov r10, QWORD PTR [rdx+176] mov r8, QWORD PTR [rsp+176] sbb r10, r8 mov QWORD PTR [rcx+168], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rsp+184] sbb r11, r8 mov QWORD PTR [rcx+176], r10 mov r10, QWORD PTR [rdx+192] mov r8, QWORD PTR [rsp+192] sbb r10, r8 mov QWORD PTR [rcx+184], r11 mov r11, QWORD PTR [rdx+200] mov r8, QWORD PTR [rsp+200] sbb r11, r8 mov QWORD PTR [rcx+192], r10 mov r10, QWORD PTR [rdx+208] mov r8, QWORD PTR [rsp+208] sbb r10, r8 mov QWORD PTR [rcx+200], r11 mov r11, QWORD PTR [rdx+216] mov r8, QWORD PTR [rsp+216] sbb r11, r8 mov QWORD PTR [rcx+208], r10 mov r10, QWORD PTR [rdx+224] mov r8, QWORD PTR [rsp+224] sbb r10, r8 mov QWORD PTR [rcx+216], r11 mov r11, QWORD PTR [rdx+232] mov r8, QWORD PTR [rsp+232] sbb r11, r8 mov QWORD PTR [rcx+224], r10 mov r10, QWORD PTR [rdx+240] mov r8, QWORD PTR [rsp+240] sbb r10, r8 mov QWORD PTR [rcx+232], r11 mov r11, QWORD PTR [rdx+248] mov r8, QWORD PTR [rsp+248] sbb r11, r8 mov QWORD PTR [rcx+240], r10 mov r10, QWORD PTR [rdx+256] mov r8, QWORD PTR [rsp+256] sbb r10, r8 mov QWORD PTR [rcx+248], r11 mov r11, QWORD PTR [rdx+264] mov r8, QWORD PTR [rsp+264] sbb r11, r8 mov QWORD PTR [rcx+256], r10 mov r10, QWORD PTR [rdx+272] mov r8, QWORD PTR [rsp+272] sbb r10, r8 mov QWORD PTR [rcx+264], r11 mov r11, QWORD PTR [rdx+280] mov r8, QWORD PTR [rsp+280] sbb r11, r8 mov QWORD PTR [rcx+272], r10 mov r10, QWORD PTR [rdx+288] mov r8, QWORD PTR [rsp+288] sbb r10, r8 mov QWORD PTR [rcx+280], r11 mov r11, QWORD PTR [rdx+296] mov r8, QWORD PTR [rsp+296] sbb r11, r8 mov QWORD PTR [rcx+288], r10 mov r10, QWORD PTR [rdx+304] mov r8, QWORD PTR [rsp+304] sbb r10, r8 mov QWORD PTR [rcx+296], r11 mov r11, QWORD PTR [rdx+312] mov r8, QWORD PTR [rsp+312] sbb r11, r8 mov QWORD PTR [rcx+304], r10 mov r10, QWORD PTR [rdx+320] mov r8, QWORD PTR [rsp+320] sbb r10, r8 mov QWORD PTR [rcx+312], r11 mov r11, QWORD PTR [rdx+328] mov r8, QWORD PTR [rsp+328] sbb r11, r8 mov QWORD PTR [rcx+320], r10 mov r10, QWORD PTR [rdx+336] mov r8, QWORD PTR [rsp+336] sbb r10, r8 mov QWORD PTR [rcx+328], r11 mov r11, QWORD PTR [rdx+344] mov r8, QWORD PTR [rsp+344] sbb r11, r8 mov QWORD PTR [rcx+336], r10 mov r10, QWORD PTR [rdx+352] mov r8, QWORD PTR [rsp+352] sbb r10, r8 mov QWORD PTR [rcx+344], r11 mov r11, QWORD PTR [rdx+360] mov r8, QWORD PTR [rsp+360] sbb r11, r8 mov QWORD PTR [rcx+352], r10 mov r10, QWORD PTR [rdx+368] mov r8, QWORD PTR [rsp+368] sbb r10, r8 mov QWORD PTR [rcx+360], r11 mov r11, QWORD PTR [rdx+376] mov r8, QWORD PTR [rsp+376] sbb r11, r8 mov QWORD PTR [rcx+368], r10 mov QWORD PTR [rcx+376], r11 sbb rax, rax add rsp, 384 ret sp_3072_cond_sub_48 ENDP _text ENDS ; /* Reduce the number back to 3072 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_3072_mont_reduce_48 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 48 mov r10, 48 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_3072_mont_reduce_48_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+40], r14 adc r11, 0 ; a[i+6] += m[6] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+48] mov r14, QWORD PTR [rcx+48] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+48], r14 adc r12, 0 ; a[i+7] += m[7] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+56] mov r14, QWORD PTR [rcx+56] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+56], r14 adc r11, 0 ; a[i+8] += m[8] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+64] mov r14, QWORD PTR [rcx+64] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+64], r14 adc r12, 0 ; a[i+9] += m[9] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+72] mov r14, QWORD PTR [rcx+72] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+72], r14 adc r11, 0 ; a[i+10] += m[10] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+80] mov r14, QWORD PTR [rcx+80] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+80], r14 adc r12, 0 ; a[i+11] += m[11] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+88] mov r14, QWORD PTR [rcx+88] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+88], r14 adc r11, 0 ; a[i+12] += m[12] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+96] mov r14, QWORD PTR [rcx+96] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+96], r14 adc r12, 0 ; a[i+13] += m[13] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+104] mov r14, QWORD PTR [rcx+104] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+104], r14 adc r11, 0 ; a[i+14] += m[14] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+112] mov r14, QWORD PTR [rcx+112] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+112], r14 adc r12, 0 ; a[i+15] += m[15] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+120] mov r14, QWORD PTR [rcx+120] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+120], r14 adc r11, 0 ; a[i+16] += m[16] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+128] mov r14, QWORD PTR [rcx+128] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+128], r14 adc r12, 0 ; a[i+17] += m[17] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+136] mov r14, QWORD PTR [rcx+136] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+136], r14 adc r11, 0 ; a[i+18] += m[18] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+144] mov r14, QWORD PTR [rcx+144] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+144], r14 adc r12, 0 ; a[i+19] += m[19] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+152] mov r14, QWORD PTR [rcx+152] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+152], r14 adc r11, 0 ; a[i+20] += m[20] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+160] mov r14, QWORD PTR [rcx+160] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+160], r14 adc r12, 0 ; a[i+21] += m[21] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+168] mov r14, QWORD PTR [rcx+168] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+168], r14 adc r11, 0 ; a[i+22] += m[22] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+176] mov r14, QWORD PTR [rcx+176] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+176], r14 adc r12, 0 ; a[i+23] += m[23] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+184] mov r14, QWORD PTR [rcx+184] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+184], r14 adc r11, 0 ; a[i+24] += m[24] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+192] mov r14, QWORD PTR [rcx+192] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+192], r14 adc r12, 0 ; a[i+25] += m[25] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+200] mov r14, QWORD PTR [rcx+200] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+200], r14 adc r11, 0 ; a[i+26] += m[26] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+208] mov r14, QWORD PTR [rcx+208] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+208], r14 adc r12, 0 ; a[i+27] += m[27] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+216] mov r14, QWORD PTR [rcx+216] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+216], r14 adc r11, 0 ; a[i+28] += m[28] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+224] mov r14, QWORD PTR [rcx+224] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+224], r14 adc r12, 0 ; a[i+29] += m[29] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+232] mov r14, QWORD PTR [rcx+232] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+232], r14 adc r11, 0 ; a[i+30] += m[30] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+240] mov r14, QWORD PTR [rcx+240] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+240], r14 adc r12, 0 ; a[i+31] += m[31] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+248] mov r14, QWORD PTR [rcx+248] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+248], r14 adc r11, 0 ; a[i+32] += m[32] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+256] mov r14, QWORD PTR [rcx+256] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+256], r14 adc r12, 0 ; a[i+33] += m[33] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+264] mov r14, QWORD PTR [rcx+264] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+264], r14 adc r11, 0 ; a[i+34] += m[34] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+272] mov r14, QWORD PTR [rcx+272] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+272], r14 adc r12, 0 ; a[i+35] += m[35] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+280] mov r14, QWORD PTR [rcx+280] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+280], r14 adc r11, 0 ; a[i+36] += m[36] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+288] mov r14, QWORD PTR [rcx+288] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+288], r14 adc r12, 0 ; a[i+37] += m[37] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+296] mov r14, QWORD PTR [rcx+296] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+296], r14 adc r11, 0 ; a[i+38] += m[38] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+304] mov r14, QWORD PTR [rcx+304] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+304], r14 adc r12, 0 ; a[i+39] += m[39] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+312] mov r14, QWORD PTR [rcx+312] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+312], r14 adc r11, 0 ; a[i+40] += m[40] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+320] mov r14, QWORD PTR [rcx+320] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+320], r14 adc r12, 0 ; a[i+41] += m[41] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+328] mov r14, QWORD PTR [rcx+328] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+328], r14 adc r11, 0 ; a[i+42] += m[42] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+336] mov r14, QWORD PTR [rcx+336] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+336], r14 adc r12, 0 ; a[i+43] += m[43] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+344] mov r14, QWORD PTR [rcx+344] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+344], r14 adc r11, 0 ; a[i+44] += m[44] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+352] mov r14, QWORD PTR [rcx+352] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+352], r14 adc r12, 0 ; a[i+45] += m[45] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+360] mov r14, QWORD PTR [rcx+360] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+360], r14 adc r11, 0 ; a[i+46] += m[46] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+368] mov r14, QWORD PTR [rcx+368] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+368], r14 adc r12, 0 ; a[i+47] += m[47] * mu mov rax, r13 mul QWORD PTR [r9+376] mov r14, QWORD PTR [rcx+376] add r12, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r12 mov QWORD PTR [rcx+376], r14 adc QWORD PTR [rcx+384], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_3072_mont_reduce_48_loop mov QWORD PTR [rcx], r15 mov QWORD PTR [rcx+8], rdi neg rsi IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx mov rcx, rcx sub rcx, 384 call sp_3072_cond_sub_48 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mont_reduce_48 ENDP _text ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_3072_sub_48 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 sbb r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 sbb r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 sbb r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 sbb r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 sbb r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 sbb r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 sbb r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 sbb r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 sbb r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 sbb r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 sbb r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 sbb r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 sbb r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 sbb r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 sbb r10, QWORD PTR [r8+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [rcx+120], r10 sbb r9, QWORD PTR [r8+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [rcx+128], r9 sbb r10, QWORD PTR [r8+136] mov r9, QWORD PTR [rdx+144] mov QWORD PTR [rcx+136], r10 sbb r9, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+152] mov QWORD PTR [rcx+144], r9 sbb r10, QWORD PTR [r8+152] mov r9, QWORD PTR [rdx+160] mov QWORD PTR [rcx+152], r10 sbb r9, QWORD PTR [r8+160] mov r10, QWORD PTR [rdx+168] mov QWORD PTR [rcx+160], r9 sbb r10, QWORD PTR [r8+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [rcx+168], r10 sbb r9, QWORD PTR [r8+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [rcx+176], r9 sbb r10, QWORD PTR [r8+184] mov r9, QWORD PTR [rdx+192] mov QWORD PTR [rcx+184], r10 sbb r9, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+200] mov QWORD PTR [rcx+192], r9 sbb r10, QWORD PTR [r8+200] mov r9, QWORD PTR [rdx+208] mov QWORD PTR [rcx+200], r10 sbb r9, QWORD PTR [r8+208] mov r10, QWORD PTR [rdx+216] mov QWORD PTR [rcx+208], r9 sbb r10, QWORD PTR [r8+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [rcx+216], r10 sbb r9, QWORD PTR [r8+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [rcx+224], r9 sbb r10, QWORD PTR [r8+232] mov r9, QWORD PTR [rdx+240] mov QWORD PTR [rcx+232], r10 sbb r9, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+248] mov QWORD PTR [rcx+240], r9 sbb r10, QWORD PTR [r8+248] mov r9, QWORD PTR [rdx+256] mov QWORD PTR [rcx+248], r10 sbb r9, QWORD PTR [r8+256] mov r10, QWORD PTR [rdx+264] mov QWORD PTR [rcx+256], r9 sbb r10, QWORD PTR [r8+264] mov r9, QWORD PTR [rdx+272] mov QWORD PTR [rcx+264], r10 sbb r9, QWORD PTR [r8+272] mov r10, QWORD PTR [rdx+280] mov QWORD PTR [rcx+272], r9 sbb r10, QWORD PTR [r8+280] mov r9, QWORD PTR [rdx+288] mov QWORD PTR [rcx+280], r10 sbb r9, QWORD PTR [r8+288] mov r10, QWORD PTR [rdx+296] mov QWORD PTR [rcx+288], r9 sbb r10, QWORD PTR [r8+296] mov r9, QWORD PTR [rdx+304] mov QWORD PTR [rcx+296], r10 sbb r9, QWORD PTR [r8+304] mov r10, QWORD PTR [rdx+312] mov QWORD PTR [rcx+304], r9 sbb r10, QWORD PTR [r8+312] mov r9, QWORD PTR [rdx+320] mov QWORD PTR [rcx+312], r10 sbb r9, QWORD PTR [r8+320] mov r10, QWORD PTR [rdx+328] mov QWORD PTR [rcx+320], r9 sbb r10, QWORD PTR [r8+328] mov r9, QWORD PTR [rdx+336] mov QWORD PTR [rcx+328], r10 sbb r9, QWORD PTR [r8+336] mov r10, QWORD PTR [rdx+344] mov QWORD PTR [rcx+336], r9 sbb r10, QWORD PTR [r8+344] mov r9, QWORD PTR [rdx+352] mov QWORD PTR [rcx+344], r10 sbb r9, QWORD PTR [r8+352] mov r10, QWORD PTR [rdx+360] mov QWORD PTR [rcx+352], r9 sbb r10, QWORD PTR [r8+360] mov r9, QWORD PTR [rdx+368] mov QWORD PTR [rcx+360], r10 sbb r9, QWORD PTR [r8+368] mov r10, QWORD PTR [rdx+376] mov QWORD PTR [rcx+368], r9 sbb r10, QWORD PTR [r8+376] mov QWORD PTR [rcx+376], r10 sbb rax, rax ret sp_3072_sub_48 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_3072_mul_d_avx2_48 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; A[6] * B mulx r10, r9, QWORD PTR [rax+48] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; A[7] * B mulx r10, r9, QWORD PTR [rax+56] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; A[8] * B mulx r10, r9, QWORD PTR [rax+64] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+64], r11 ; A[9] * B mulx r10, r9, QWORD PTR [rax+72] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+72], r12 ; A[10] * B mulx r10, r9, QWORD PTR [rax+80] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+80], r11 ; A[11] * B mulx r10, r9, QWORD PTR [rax+88] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+88], r12 ; A[12] * B mulx r10, r9, QWORD PTR [rax+96] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+96], r11 ; A[13] * B mulx r10, r9, QWORD PTR [rax+104] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+104], r12 ; A[14] * B mulx r10, r9, QWORD PTR [rax+112] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+112], r11 ; A[15] * B mulx r10, r9, QWORD PTR [rax+120] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+120], r12 ; A[16] * B mulx r10, r9, QWORD PTR [rax+128] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+128], r11 ; A[17] * B mulx r10, r9, QWORD PTR [rax+136] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+136], r12 ; A[18] * B mulx r10, r9, QWORD PTR [rax+144] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+144], r11 ; A[19] * B mulx r10, r9, QWORD PTR [rax+152] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+152], r12 ; A[20] * B mulx r10, r9, QWORD PTR [rax+160] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+160], r11 ; A[21] * B mulx r10, r9, QWORD PTR [rax+168] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+168], r12 ; A[22] * B mulx r10, r9, QWORD PTR [rax+176] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+176], r11 ; A[23] * B mulx r10, r9, QWORD PTR [rax+184] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+184], r12 ; A[24] * B mulx r10, r9, QWORD PTR [rax+192] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+192], r11 ; A[25] * B mulx r10, r9, QWORD PTR [rax+200] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+200], r12 ; A[26] * B mulx r10, r9, QWORD PTR [rax+208] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+208], r11 ; A[27] * B mulx r10, r9, QWORD PTR [rax+216] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+216], r12 ; A[28] * B mulx r10, r9, QWORD PTR [rax+224] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+224], r11 ; A[29] * B mulx r10, r9, QWORD PTR [rax+232] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+232], r12 ; A[30] * B mulx r10, r9, QWORD PTR [rax+240] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+240], r11 ; A[31] * B mulx r10, r9, QWORD PTR [rax+248] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+248], r12 ; A[32] * B mulx r10, r9, QWORD PTR [rax+256] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+256], r11 ; A[33] * B mulx r10, r9, QWORD PTR [rax+264] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+264], r12 ; A[34] * B mulx r10, r9, QWORD PTR [rax+272] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+272], r11 ; A[35] * B mulx r10, r9, QWORD PTR [rax+280] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+280], r12 ; A[36] * B mulx r10, r9, QWORD PTR [rax+288] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+288], r11 ; A[37] * B mulx r10, r9, QWORD PTR [rax+296] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+296], r12 ; A[38] * B mulx r10, r9, QWORD PTR [rax+304] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+304], r11 ; A[39] * B mulx r10, r9, QWORD PTR [rax+312] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+312], r12 ; A[40] * B mulx r10, r9, QWORD PTR [rax+320] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+320], r11 ; A[41] * B mulx r10, r9, QWORD PTR [rax+328] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+328], r12 ; A[42] * B mulx r10, r9, QWORD PTR [rax+336] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+336], r11 ; A[43] * B mulx r10, r9, QWORD PTR [rax+344] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+344], r12 ; A[44] * B mulx r10, r9, QWORD PTR [rax+352] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+352], r11 ; A[45] * B mulx r10, r9, QWORD PTR [rax+360] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+360], r12 ; A[46] * B mulx r10, r9, QWORD PTR [rax+368] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+368], r11 ; A[47] * B mulx r10, r9, QWORD PTR [rax+376] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+376], r12 mov QWORD PTR [rcx+384], r11 pop r13 pop r12 ret sp_3072_mul_d_avx2_48 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_3072_word_asm_48 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_3072_word_asm_48 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_3072_cond_sub_avx2_48 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 sbb r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 sbb r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 sbb r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 sbb r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 sbb r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 sbb r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 sbb r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 sbb r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 sbb r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 sbb r10, r11 mov r12, QWORD PTR [r8+128] mov r11, QWORD PTR [rdx+128] pext r12, r12, r9 mov QWORD PTR [rcx+120], r10 sbb r11, r12 mov r10, QWORD PTR [r8+136] mov r12, QWORD PTR [rdx+136] pext r10, r10, r9 mov QWORD PTR [rcx+128], r11 sbb r12, r10 mov r11, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+144] pext r11, r11, r9 mov QWORD PTR [rcx+136], r12 sbb r10, r11 mov r12, QWORD PTR [r8+152] mov r11, QWORD PTR [rdx+152] pext r12, r12, r9 mov QWORD PTR [rcx+144], r10 sbb r11, r12 mov r10, QWORD PTR [r8+160] mov r12, QWORD PTR [rdx+160] pext r10, r10, r9 mov QWORD PTR [rcx+152], r11 sbb r12, r10 mov r11, QWORD PTR [r8+168] mov r10, QWORD PTR [rdx+168] pext r11, r11, r9 mov QWORD PTR [rcx+160], r12 sbb r10, r11 mov r12, QWORD PTR [r8+176] mov r11, QWORD PTR [rdx+176] pext r12, r12, r9 mov QWORD PTR [rcx+168], r10 sbb r11, r12 mov r10, QWORD PTR [r8+184] mov r12, QWORD PTR [rdx+184] pext r10, r10, r9 mov QWORD PTR [rcx+176], r11 sbb r12, r10 mov r11, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+192] pext r11, r11, r9 mov QWORD PTR [rcx+184], r12 sbb r10, r11 mov r12, QWORD PTR [r8+200] mov r11, QWORD PTR [rdx+200] pext r12, r12, r9 mov QWORD PTR [rcx+192], r10 sbb r11, r12 mov r10, QWORD PTR [r8+208] mov r12, QWORD PTR [rdx+208] pext r10, r10, r9 mov QWORD PTR [rcx+200], r11 sbb r12, r10 mov r11, QWORD PTR [r8+216] mov r10, QWORD PTR [rdx+216] pext r11, r11, r9 mov QWORD PTR [rcx+208], r12 sbb r10, r11 mov r12, QWORD PTR [r8+224] mov r11, QWORD PTR [rdx+224] pext r12, r12, r9 mov QWORD PTR [rcx+216], r10 sbb r11, r12 mov r10, QWORD PTR [r8+232] mov r12, QWORD PTR [rdx+232] pext r10, r10, r9 mov QWORD PTR [rcx+224], r11 sbb r12, r10 mov r11, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+240] pext r11, r11, r9 mov QWORD PTR [rcx+232], r12 sbb r10, r11 mov r12, QWORD PTR [r8+248] mov r11, QWORD PTR [rdx+248] pext r12, r12, r9 mov QWORD PTR [rcx+240], r10 sbb r11, r12 mov r10, QWORD PTR [r8+256] mov r12, QWORD PTR [rdx+256] pext r10, r10, r9 mov QWORD PTR [rcx+248], r11 sbb r12, r10 mov r11, QWORD PTR [r8+264] mov r10, QWORD PTR [rdx+264] pext r11, r11, r9 mov QWORD PTR [rcx+256], r12 sbb r10, r11 mov r12, QWORD PTR [r8+272] mov r11, QWORD PTR [rdx+272] pext r12, r12, r9 mov QWORD PTR [rcx+264], r10 sbb r11, r12 mov r10, QWORD PTR [r8+280] mov r12, QWORD PTR [rdx+280] pext r10, r10, r9 mov QWORD PTR [rcx+272], r11 sbb r12, r10 mov r11, QWORD PTR [r8+288] mov r10, QWORD PTR [rdx+288] pext r11, r11, r9 mov QWORD PTR [rcx+280], r12 sbb r10, r11 mov r12, QWORD PTR [r8+296] mov r11, QWORD PTR [rdx+296] pext r12, r12, r9 mov QWORD PTR [rcx+288], r10 sbb r11, r12 mov r10, QWORD PTR [r8+304] mov r12, QWORD PTR [rdx+304] pext r10, r10, r9 mov QWORD PTR [rcx+296], r11 sbb r12, r10 mov r11, QWORD PTR [r8+312] mov r10, QWORD PTR [rdx+312] pext r11, r11, r9 mov QWORD PTR [rcx+304], r12 sbb r10, r11 mov r12, QWORD PTR [r8+320] mov r11, QWORD PTR [rdx+320] pext r12, r12, r9 mov QWORD PTR [rcx+312], r10 sbb r11, r12 mov r10, QWORD PTR [r8+328] mov r12, QWORD PTR [rdx+328] pext r10, r10, r9 mov QWORD PTR [rcx+320], r11 sbb r12, r10 mov r11, QWORD PTR [r8+336] mov r10, QWORD PTR [rdx+336] pext r11, r11, r9 mov QWORD PTR [rcx+328], r12 sbb r10, r11 mov r12, QWORD PTR [r8+344] mov r11, QWORD PTR [rdx+344] pext r12, r12, r9 mov QWORD PTR [rcx+336], r10 sbb r11, r12 mov r10, QWORD PTR [r8+352] mov r12, QWORD PTR [rdx+352] pext r10, r10, r9 mov QWORD PTR [rcx+344], r11 sbb r12, r10 mov r11, QWORD PTR [r8+360] mov r10, QWORD PTR [rdx+360] pext r11, r11, r9 mov QWORD PTR [rcx+352], r12 sbb r10, r11 mov r12, QWORD PTR [r8+368] mov r11, QWORD PTR [rdx+368] pext r12, r12, r9 mov QWORD PTR [rcx+360], r10 sbb r11, r12 mov r10, QWORD PTR [r8+376] mov r12, QWORD PTR [rdx+376] pext r10, r10, r9 mov QWORD PTR [rcx+368], r11 sbb r12, r10 mov QWORD PTR [rcx+376], r12 sbb rax, rax pop r12 ret sp_3072_cond_sub_avx2_48 ENDP _text ENDS ENDIF ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_3072_cmp_48 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+376] mov r12, QWORD PTR [rdx+376] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+368] mov r12, QWORD PTR [rdx+368] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+360] mov r12, QWORD PTR [rdx+360] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+352] mov r12, QWORD PTR [rdx+352] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+344] mov r12, QWORD PTR [rdx+344] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+336] mov r12, QWORD PTR [rdx+336] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+328] mov r12, QWORD PTR [rdx+328] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+320] mov r12, QWORD PTR [rdx+320] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+312] mov r12, QWORD PTR [rdx+312] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+304] mov r12, QWORD PTR [rdx+304] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+296] mov r12, QWORD PTR [rdx+296] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+288] mov r12, QWORD PTR [rdx+288] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+280] mov r12, QWORD PTR [rdx+280] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+272] mov r12, QWORD PTR [rdx+272] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+264] mov r12, QWORD PTR [rdx+264] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+256] mov r12, QWORD PTR [rdx+256] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+248] mov r12, QWORD PTR [rdx+248] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+240] mov r12, QWORD PTR [rdx+240] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+232] mov r12, QWORD PTR [rdx+232] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+224] mov r12, QWORD PTR [rdx+224] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+216] mov r12, QWORD PTR [rdx+216] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+208] mov r12, QWORD PTR [rdx+208] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+200] mov r12, QWORD PTR [rdx+200] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+192] mov r12, QWORD PTR [rdx+192] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+184] mov r12, QWORD PTR [rdx+184] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+176] mov r12, QWORD PTR [rdx+176] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+168] mov r12, QWORD PTR [rdx+168] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+160] mov r12, QWORD PTR [rdx+160] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+152] mov r12, QWORD PTR [rdx+152] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+144] mov r12, QWORD PTR [rdx+144] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+136] mov r12, QWORD PTR [rdx+136] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+128] mov r12, QWORD PTR [rdx+128] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+120] mov r12, QWORD PTR [rdx+120] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+112] mov r12, QWORD PTR [rdx+112] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+104] mov r12, QWORD PTR [rdx+104] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+96] mov r12, QWORD PTR [rdx+96] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+88] mov r12, QWORD PTR [rdx+88] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+80] mov r12, QWORD PTR [rdx+80] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+72] mov r12, QWORD PTR [rdx+72] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+64] mov r12, QWORD PTR [rdx+64] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+56] mov r12, QWORD PTR [rdx+56] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+48] mov r12, QWORD PTR [rdx+48] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_3072_cmp_48 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_3072_get_from_table_48 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax pxor xmm13, xmm13 pshufd xmm11, xmm11, 0 pshufd xmm10, xmm10, 0 ; START: 0-7 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 0-7 ; START: 8-15 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 8-15 ; START: 16-23 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 16-23 ; START: 24-31 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 24-31 ; START: 32-39 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 32-39 ; START: 40-47 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 ; END: 40-47 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_3072_get_from_table_48 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 3072 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_3072_mont_reduce_avx2_48 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r9, rcx mov r10, rdx xor rbp, rbp ; i = 48 mov r11, 48 mov r14, QWORD PTR [r9] mov r15, QWORD PTR [r9+8] mov rdi, QWORD PTR [r9+16] mov rsi, QWORD PTR [r9+24] add r9, 192 xor rbp, rbp L_3072_mont_reduce_avx2_48_loop: ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-160] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-152] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-144] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-152], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+-136] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-144], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+-128] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-136], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+-120] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-128], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+-112] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-120], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+-104] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-112], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9+-96] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-104], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+-88] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-96], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+-80] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-88], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+-72] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-80], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9+-64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-72], r13 ; a[i+16] += m[16] * mu mulx rcx, rax, QWORD PTR [r10+128] mov r13, QWORD PTR [r9+-56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-64], r12 ; a[i+17] += m[17] * mu mulx rcx, rax, QWORD PTR [r10+136] mov r12, QWORD PTR [r9+-48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-56], r13 ; a[i+18] += m[18] * mu mulx rcx, rax, QWORD PTR [r10+144] mov r13, QWORD PTR [r9+-40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-48], r12 ; a[i+19] += m[19] * mu mulx rcx, rax, QWORD PTR [r10+152] mov r12, QWORD PTR [r9+-32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-40], r13 ; a[i+20] += m[20] * mu mulx rcx, rax, QWORD PTR [r10+160] mov r13, QWORD PTR [r9+-24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-32], r12 ; a[i+21] += m[21] * mu mulx rcx, rax, QWORD PTR [r10+168] mov r12, QWORD PTR [r9+-16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-24], r13 ; a[i+22] += m[22] * mu mulx rcx, rax, QWORD PTR [r10+176] mov r13, QWORD PTR [r9+-8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-16], r12 ; a[i+23] += m[23] * mu mulx rcx, rax, QWORD PTR [r10+184] mov r12, QWORD PTR [r9] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-8], r13 ; a[i+24] += m[24] * mu mulx rcx, rax, QWORD PTR [r10+192] mov r13, QWORD PTR [r9+8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9], r12 ; a[i+25] += m[25] * mu mulx rcx, rax, QWORD PTR [r10+200] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+26] += m[26] * mu mulx rcx, rax, QWORD PTR [r10+208] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+27] += m[27] * mu mulx rcx, rax, QWORD PTR [r10+216] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+28] += m[28] * mu mulx rcx, rax, QWORD PTR [r10+224] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 ; a[i+29] += m[29] * mu mulx rcx, rax, QWORD PTR [r10+232] mov r12, QWORD PTR [r9+48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+40], r13 ; a[i+30] += m[30] * mu mulx rcx, rax, QWORD PTR [r10+240] mov r13, QWORD PTR [r9+56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+48], r12 ; a[i+31] += m[31] * mu mulx rcx, rax, QWORD PTR [r10+248] mov r12, QWORD PTR [r9+64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+56], r13 ; a[i+32] += m[32] * mu mulx rcx, rax, QWORD PTR [r10+256] mov r13, QWORD PTR [r9+72] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+64], r12 ; a[i+33] += m[33] * mu mulx rcx, rax, QWORD PTR [r10+264] mov r12, QWORD PTR [r9+80] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+72], r13 ; a[i+34] += m[34] * mu mulx rcx, rax, QWORD PTR [r10+272] mov r13, QWORD PTR [r9+88] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+80], r12 ; a[i+35] += m[35] * mu mulx rcx, rax, QWORD PTR [r10+280] mov r12, QWORD PTR [r9+96] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+88], r13 ; a[i+36] += m[36] * mu mulx rcx, rax, QWORD PTR [r10+288] mov r13, QWORD PTR [r9+104] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+96], r12 ; a[i+37] += m[37] * mu mulx rcx, rax, QWORD PTR [r10+296] mov r12, QWORD PTR [r9+112] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+104], r13 ; a[i+38] += m[38] * mu mulx rcx, rax, QWORD PTR [r10+304] mov r13, QWORD PTR [r9+120] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+112], r12 ; a[i+39] += m[39] * mu mulx rcx, rax, QWORD PTR [r10+312] mov r12, QWORD PTR [r9+128] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+120], r13 ; a[i+40] += m[40] * mu mulx rcx, rax, QWORD PTR [r10+320] mov r13, QWORD PTR [r9+136] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+128], r12 ; a[i+41] += m[41] * mu mulx rcx, rax, QWORD PTR [r10+328] mov r12, QWORD PTR [r9+144] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+136], r13 ; a[i+42] += m[42] * mu mulx rcx, rax, QWORD PTR [r10+336] mov r13, QWORD PTR [r9+152] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+144], r12 ; a[i+43] += m[43] * mu mulx rcx, rax, QWORD PTR [r10+344] mov r12, QWORD PTR [r9+160] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+152], r13 ; a[i+44] += m[44] * mu mulx rcx, rax, QWORD PTR [r10+352] mov r13, QWORD PTR [r9+168] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+160], r12 ; a[i+45] += m[45] * mu mulx rcx, rax, QWORD PTR [r10+360] mov r12, QWORD PTR [r9+176] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+168], r13 ; a[i+46] += m[46] * mu mulx rcx, rax, QWORD PTR [r10+368] mov r13, QWORD PTR [r9+184] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+176], r12 ; a[i+47] += m[47] * mu mulx rcx, rax, QWORD PTR [r10+376] mov r12, QWORD PTR [r9+192] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+184], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+192], r12 adox rbp, rbx adcx rbp, rbx ; a += 1 add r9, 8 ; i -= 1 sub r11, 1 jnz L_3072_mont_reduce_avx2_48_loop sub r9, 192 neg rbp mov r8, r9 sub r9, 384 mov rcx, QWORD PTR [r10] mov rdx, r14 pext rcx, rcx, rbp sub rdx, rcx mov rcx, QWORD PTR [r10+8] mov rax, r15 pext rcx, rcx, rbp mov QWORD PTR [r9], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+16] mov rcx, rdi pext rdx, rdx, rbp mov QWORD PTR [r9+8], rax sbb rcx, rdx mov rax, QWORD PTR [r10+24] mov rdx, rsi pext rax, rax, rbp mov QWORD PTR [r9+16], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+32] mov rax, QWORD PTR [r8+32] pext rcx, rcx, rbp mov QWORD PTR [r9+24], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+40] mov rcx, QWORD PTR [r8+40] pext rdx, rdx, rbp mov QWORD PTR [r9+32], rax sbb rcx, rdx mov rax, QWORD PTR [r10+48] mov rdx, QWORD PTR [r8+48] pext rax, rax, rbp mov QWORD PTR [r9+40], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+56] mov rax, QWORD PTR [r8+56] pext rcx, rcx, rbp mov QWORD PTR [r9+48], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+64] mov rcx, QWORD PTR [r8+64] pext rdx, rdx, rbp mov QWORD PTR [r9+56], rax sbb rcx, rdx mov rax, QWORD PTR [r10+72] mov rdx, QWORD PTR [r8+72] pext rax, rax, rbp mov QWORD PTR [r9+64], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+80] mov rax, QWORD PTR [r8+80] pext rcx, rcx, rbp mov QWORD PTR [r9+72], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+88] mov rcx, QWORD PTR [r8+88] pext rdx, rdx, rbp mov QWORD PTR [r9+80], rax sbb rcx, rdx mov rax, QWORD PTR [r10+96] mov rdx, QWORD PTR [r8+96] pext rax, rax, rbp mov QWORD PTR [r9+88], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+104] mov rax, QWORD PTR [r8+104] pext rcx, rcx, rbp mov QWORD PTR [r9+96], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+112] mov rcx, QWORD PTR [r8+112] pext rdx, rdx, rbp mov QWORD PTR [r9+104], rax sbb rcx, rdx mov rax, QWORD PTR [r10+120] mov rdx, QWORD PTR [r8+120] pext rax, rax, rbp mov QWORD PTR [r9+112], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+128] mov rax, QWORD PTR [r8+128] pext rcx, rcx, rbp mov QWORD PTR [r9+120], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+136] mov rcx, QWORD PTR [r8+136] pext rdx, rdx, rbp mov QWORD PTR [r9+128], rax sbb rcx, rdx mov rax, QWORD PTR [r10+144] mov rdx, QWORD PTR [r8+144] pext rax, rax, rbp mov QWORD PTR [r9+136], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+152] mov rax, QWORD PTR [r8+152] pext rcx, rcx, rbp mov QWORD PTR [r9+144], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+160] mov rcx, QWORD PTR [r8+160] pext rdx, rdx, rbp mov QWORD PTR [r9+152], rax sbb rcx, rdx mov rax, QWORD PTR [r10+168] mov rdx, QWORD PTR [r8+168] pext rax, rax, rbp mov QWORD PTR [r9+160], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+176] mov rax, QWORD PTR [r8+176] pext rcx, rcx, rbp mov QWORD PTR [r9+168], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+184] mov rcx, QWORD PTR [r8+184] pext rdx, rdx, rbp mov QWORD PTR [r9+176], rax sbb rcx, rdx mov rax, QWORD PTR [r10+192] mov rdx, QWORD PTR [r8+192] pext rax, rax, rbp mov QWORD PTR [r9+184], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+200] mov rax, QWORD PTR [r8+200] pext rcx, rcx, rbp mov QWORD PTR [r9+192], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+208] mov rcx, QWORD PTR [r8+208] pext rdx, rdx, rbp mov QWORD PTR [r9+200], rax sbb rcx, rdx mov rax, QWORD PTR [r10+216] mov rdx, QWORD PTR [r8+216] pext rax, rax, rbp mov QWORD PTR [r9+208], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+224] mov rax, QWORD PTR [r8+224] pext rcx, rcx, rbp mov QWORD PTR [r9+216], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+232] mov rcx, QWORD PTR [r8+232] pext rdx, rdx, rbp mov QWORD PTR [r9+224], rax sbb rcx, rdx mov rax, QWORD PTR [r10+240] mov rdx, QWORD PTR [r8+240] pext rax, rax, rbp mov QWORD PTR [r9+232], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+248] mov rax, QWORD PTR [r8+248] pext rcx, rcx, rbp mov QWORD PTR [r9+240], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+256] mov rcx, QWORD PTR [r8+256] pext rdx, rdx, rbp mov QWORD PTR [r9+248], rax sbb rcx, rdx mov rax, QWORD PTR [r10+264] mov rdx, QWORD PTR [r8+264] pext rax, rax, rbp mov QWORD PTR [r9+256], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+272] mov rax, QWORD PTR [r8+272] pext rcx, rcx, rbp mov QWORD PTR [r9+264], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+280] mov rcx, QWORD PTR [r8+280] pext rdx, rdx, rbp mov QWORD PTR [r9+272], rax sbb rcx, rdx mov rax, QWORD PTR [r10+288] mov rdx, QWORD PTR [r8+288] pext rax, rax, rbp mov QWORD PTR [r9+280], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+296] mov rax, QWORD PTR [r8+296] pext rcx, rcx, rbp mov QWORD PTR [r9+288], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+304] mov rcx, QWORD PTR [r8+304] pext rdx, rdx, rbp mov QWORD PTR [r9+296], rax sbb rcx, rdx mov rax, QWORD PTR [r10+312] mov rdx, QWORD PTR [r8+312] pext rax, rax, rbp mov QWORD PTR [r9+304], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+320] mov rax, QWORD PTR [r8+320] pext rcx, rcx, rbp mov QWORD PTR [r9+312], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+328] mov rcx, QWORD PTR [r8+328] pext rdx, rdx, rbp mov QWORD PTR [r9+320], rax sbb rcx, rdx mov rax, QWORD PTR [r10+336] mov rdx, QWORD PTR [r8+336] pext rax, rax, rbp mov QWORD PTR [r9+328], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+344] mov rax, QWORD PTR [r8+344] pext rcx, rcx, rbp mov QWORD PTR [r9+336], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+352] mov rcx, QWORD PTR [r8+352] pext rdx, rdx, rbp mov QWORD PTR [r9+344], rax sbb rcx, rdx mov rax, QWORD PTR [r10+360] mov rdx, QWORD PTR [r8+360] pext rax, rax, rbp mov QWORD PTR [r9+352], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+368] mov rax, QWORD PTR [r8+368] pext rcx, rcx, rbp mov QWORD PTR [r9+360], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+376] mov rcx, QWORD PTR [r8+376] pext rdx, rdx, rbp mov QWORD PTR [r9+368], rax sbb rcx, rdx mov QWORD PTR [r9+376], rcx pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_3072_mont_reduce_avx2_48 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_3072_get_from_table_avx2_48 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax vpxor ymm13, ymm13, ymm13 vpermd ymm10, ymm13, ymm10 vpermd ymm11, ymm13, ymm11 ; START: 0-15 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 add rcx, 128 ; END: 0-15 ; START: 16-31 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 add rcx, 128 ; END: 16-31 ; START: 32-47 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 ; END: 32-47 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_3072_get_from_table_avx2_48 ENDP _text ENDS ENDIF ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. ; * ; * r A single precision number representing conditional add result. ; * a A single precision number to add with. ; * b A single precision number to add. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_3072_cond_add_24 PROC sub rsp, 192 mov rax, 0 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [r8+128] mov r11, QWORD PTR [r8+136] and r10, r9 and r11, r9 mov QWORD PTR [rsp+128], r10 mov QWORD PTR [rsp+136], r11 mov r10, QWORD PTR [r8+144] mov r11, QWORD PTR [r8+152] and r10, r9 and r11, r9 mov QWORD PTR [rsp+144], r10 mov QWORD PTR [rsp+152], r11 mov r10, QWORD PTR [r8+160] mov r11, QWORD PTR [r8+168] and r10, r9 and r11, r9 mov QWORD PTR [rsp+160], r10 mov QWORD PTR [rsp+168], r11 mov r10, QWORD PTR [r8+176] mov r11, QWORD PTR [r8+184] and r10, r9 and r11, r9 mov QWORD PTR [rsp+176], r10 mov QWORD PTR [rsp+184], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] add r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] adc r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] adc r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] adc r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] adc r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] adc r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] adc r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] adc r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] adc r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] adc r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] adc r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] adc r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] adc r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] adc r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] adc r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] adc r11, r8 mov QWORD PTR [rcx+112], r10 mov r10, QWORD PTR [rdx+128] mov r8, QWORD PTR [rsp+128] adc r10, r8 mov QWORD PTR [rcx+120], r11 mov r11, QWORD PTR [rdx+136] mov r8, QWORD PTR [rsp+136] adc r11, r8 mov QWORD PTR [rcx+128], r10 mov r10, QWORD PTR [rdx+144] mov r8, QWORD PTR [rsp+144] adc r10, r8 mov QWORD PTR [rcx+136], r11 mov r11, QWORD PTR [rdx+152] mov r8, QWORD PTR [rsp+152] adc r11, r8 mov QWORD PTR [rcx+144], r10 mov r10, QWORD PTR [rdx+160] mov r8, QWORD PTR [rsp+160] adc r10, r8 mov QWORD PTR [rcx+152], r11 mov r11, QWORD PTR [rdx+168] mov r8, QWORD PTR [rsp+168] adc r11, r8 mov QWORD PTR [rcx+160], r10 mov r10, QWORD PTR [rdx+176] mov r8, QWORD PTR [rsp+176] adc r10, r8 mov QWORD PTR [rcx+168], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rsp+184] adc r11, r8 mov QWORD PTR [rcx+176], r10 mov QWORD PTR [rcx+184], r11 adc rax, 0 add rsp, 192 ret sp_3072_cond_add_24 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. ; * ; * r A single precision number representing conditional add result. ; * a A single precision number to add with. ; * b A single precision number to add. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_3072_cond_add_avx2_24 PROC push r12 mov rax, 0 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 add r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 adc r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 adc r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 adc r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 adc r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 adc r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 adc r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 adc r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 adc r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 adc r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 adc r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 adc r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 adc r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 adc r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 adc r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 adc r10, r11 mov r12, QWORD PTR [r8+128] mov r11, QWORD PTR [rdx+128] pext r12, r12, r9 mov QWORD PTR [rcx+120], r10 adc r11, r12 mov r10, QWORD PTR [r8+136] mov r12, QWORD PTR [rdx+136] pext r10, r10, r9 mov QWORD PTR [rcx+128], r11 adc r12, r10 mov r11, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+144] pext r11, r11, r9 mov QWORD PTR [rcx+136], r12 adc r10, r11 mov r12, QWORD PTR [r8+152] mov r11, QWORD PTR [rdx+152] pext r12, r12, r9 mov QWORD PTR [rcx+144], r10 adc r11, r12 mov r10, QWORD PTR [r8+160] mov r12, QWORD PTR [rdx+160] pext r10, r10, r9 mov QWORD PTR [rcx+152], r11 adc r12, r10 mov r11, QWORD PTR [r8+168] mov r10, QWORD PTR [rdx+168] pext r11, r11, r9 mov QWORD PTR [rcx+160], r12 adc r10, r11 mov r12, QWORD PTR [r8+176] mov r11, QWORD PTR [rdx+176] pext r12, r12, r9 mov QWORD PTR [rcx+168], r10 adc r11, r12 mov r10, QWORD PTR [r8+184] mov r12, QWORD PTR [rdx+184] pext r10, r10, r9 mov QWORD PTR [rcx+176], r11 adc r12, r10 mov QWORD PTR [rcx+184], r12 adc rax, 0 pop r12 ret sp_3072_cond_add_avx2_24 ENDP _text ENDS ENDIF ; /* Shift number left by n bit. (r = a << n) ; * ; * r Result of left shift by n. ; * a Number to shift. ; * n Amoutnt o shift. ; */ _text SEGMENT READONLY PARA sp_3072_lshift_48 PROC push r12 push r13 mov rax, rcx mov cl, r8b mov r12, 0 mov r13, QWORD PTR [rdx+344] mov r8, QWORD PTR [rdx+352] mov r9, QWORD PTR [rdx+360] mov r10, QWORD PTR [rdx+368] mov r11, QWORD PTR [rdx+376] shld r12, r11, cl shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+352], r8 mov QWORD PTR [rax+360], r9 mov QWORD PTR [rax+368], r10 mov QWORD PTR [rax+376], r11 mov QWORD PTR [rax+384], r12 mov r11, QWORD PTR [rdx+312] mov r8, QWORD PTR [rdx+320] mov r9, QWORD PTR [rdx+328] mov r10, QWORD PTR [rdx+336] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+320], r8 mov QWORD PTR [rax+328], r9 mov QWORD PTR [rax+336], r10 mov QWORD PTR [rax+344], r13 mov r13, QWORD PTR [rdx+280] mov r8, QWORD PTR [rdx+288] mov r9, QWORD PTR [rdx+296] mov r10, QWORD PTR [rdx+304] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+288], r8 mov QWORD PTR [rax+296], r9 mov QWORD PTR [rax+304], r10 mov QWORD PTR [rax+312], r11 mov r11, QWORD PTR [rdx+248] mov r8, QWORD PTR [rdx+256] mov r9, QWORD PTR [rdx+264] mov r10, QWORD PTR [rdx+272] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+256], r8 mov QWORD PTR [rax+264], r9 mov QWORD PTR [rax+272], r10 mov QWORD PTR [rax+280], r13 mov r13, QWORD PTR [rdx+216] mov r8, QWORD PTR [rdx+224] mov r9, QWORD PTR [rdx+232] mov r10, QWORD PTR [rdx+240] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+224], r8 mov QWORD PTR [rax+232], r9 mov QWORD PTR [rax+240], r10 mov QWORD PTR [rax+248], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rdx+192] mov r9, QWORD PTR [rdx+200] mov r10, QWORD PTR [rdx+208] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+192], r8 mov QWORD PTR [rax+200], r9 mov QWORD PTR [rax+208], r10 mov QWORD PTR [rax+216], r13 mov r13, QWORD PTR [rdx+152] mov r8, QWORD PTR [rdx+160] mov r9, QWORD PTR [rdx+168] mov r10, QWORD PTR [rdx+176] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+160], r8 mov QWORD PTR [rax+168], r9 mov QWORD PTR [rax+176], r10 mov QWORD PTR [rax+184], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rdx+128] mov r9, QWORD PTR [rdx+136] mov r10, QWORD PTR [rdx+144] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+128], r8 mov QWORD PTR [rax+136], r9 mov QWORD PTR [rax+144], r10 mov QWORD PTR [rax+152], r13 mov r13, QWORD PTR [rdx+88] mov r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rdx+104] mov r10, QWORD PTR [rdx+112] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+96], r8 mov QWORD PTR [rax+104], r9 mov QWORD PTR [rax+112], r10 mov QWORD PTR [rax+120], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rdx+72] mov r10, QWORD PTR [rdx+80] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+64], r8 mov QWORD PTR [rax+72], r9 mov QWORD PTR [rax+80], r10 mov QWORD PTR [rax+88], r13 mov r13, QWORD PTR [rdx+24] mov r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+32], r8 mov QWORD PTR [rax+40], r9 mov QWORD PTR [rax+48], r10 mov QWORD PTR [rax+56], r11 mov r8, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shl r8, cl mov QWORD PTR [rax], r8 mov QWORD PTR [rax+8], r9 mov QWORD PTR [rax+16], r10 mov QWORD PTR [rax+24], r13 pop r13 pop r12 ret sp_3072_lshift_48 ENDP _text ENDS ENDIF ENDIF IFDEF WOLFSSL_SP_4096 IFDEF WOLFSSL_SP_4096 ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_4096_from_bin_bswap PROC push r12 push r13 mov r11, r8 mov r12, rcx add r11, r9 add r12, 512 xor r13, r13 jmp L_4096_from_bin_bswap_64_end L_4096_from_bin_bswap_64_start: sub r11, 64 mov rax, QWORD PTR [r11+56] mov r10, QWORD PTR [r11+48] bswap rax bswap r10 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [r11+40] mov r10, QWORD PTR [r11+32] bswap rax bswap r10 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [r11+24] mov r10, QWORD PTR [r11+16] bswap rax bswap r10 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [r11+8] mov r10, QWORD PTR [r11] bswap rax bswap r10 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_4096_from_bin_bswap_64_end: cmp r9, 63 jg L_4096_from_bin_bswap_64_start jmp L_4096_from_bin_bswap_8_end L_4096_from_bin_bswap_8_start: sub r11, 8 mov rax, QWORD PTR [r11] bswap rax mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_4096_from_bin_bswap_8_end: cmp r9, 7 jg L_4096_from_bin_bswap_8_start cmp r9, r13 je L_4096_from_bin_bswap_hi_end mov r10, r13 mov rax, r13 L_4096_from_bin_bswap_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_4096_from_bin_bswap_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_4096_from_bin_bswap_hi_end: cmp rcx, r12 jge L_4096_from_bin_bswap_zero_end L_4096_from_bin_bswap_zero_start: mov QWORD PTR [rcx], r13 add rcx, 8 cmp rcx, r12 jl L_4096_from_bin_bswap_zero_start L_4096_from_bin_bswap_zero_end: pop r13 pop r12 ret sp_4096_from_bin_bswap ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_4096_from_bin_movbe PROC push r12 mov r11, r8 mov r12, rcx add r11, r9 add r12, 512 jmp L_4096_from_bin_movbe_64_end L_4096_from_bin_movbe_64_start: sub r11, 64 movbe rax, QWORD PTR [r11+56] movbe r10, QWORD PTR [r11+48] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 movbe rax, QWORD PTR [r11+40] movbe r10, QWORD PTR [r11+32] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 movbe rax, QWORD PTR [r11+24] movbe r10, QWORD PTR [r11+16] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 movbe rax, QWORD PTR [r11+8] movbe r10, QWORD PTR [r11] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_4096_from_bin_movbe_64_end: cmp r9, 63 jg L_4096_from_bin_movbe_64_start jmp L_4096_from_bin_movbe_8_end L_4096_from_bin_movbe_8_start: sub r11, 8 movbe rax, QWORD PTR [r11] mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_4096_from_bin_movbe_8_end: cmp r9, 7 jg L_4096_from_bin_movbe_8_start cmp r9, 0 je L_4096_from_bin_movbe_hi_end mov r10, 0 mov rax, 0 L_4096_from_bin_movbe_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_4096_from_bin_movbe_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_4096_from_bin_movbe_hi_end: cmp rcx, r12 jge L_4096_from_bin_movbe_zero_end L_4096_from_bin_movbe_zero_start: mov QWORD PTR [rcx], 0 add rcx, 8 cmp rcx, r12 jl L_4096_from_bin_movbe_zero_start L_4096_from_bin_movbe_zero_end: pop r12 ret sp_4096_from_bin_movbe ENDP _text ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 512 ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_4096_to_bin_bswap_64 PROC mov rax, QWORD PTR [rcx+504] mov r8, QWORD PTR [rcx+496] bswap rax bswap r8 mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 mov rax, QWORD PTR [rcx+488] mov r8, QWORD PTR [rcx+480] bswap rax bswap r8 mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 mov rax, QWORD PTR [rcx+472] mov r8, QWORD PTR [rcx+464] bswap rax bswap r8 mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 mov rax, QWORD PTR [rcx+456] mov r8, QWORD PTR [rcx+448] bswap rax bswap r8 mov QWORD PTR [rdx+48], rax mov QWORD PTR [rdx+56], r8 mov rax, QWORD PTR [rcx+440] mov r8, QWORD PTR [rcx+432] bswap rax bswap r8 mov QWORD PTR [rdx+64], rax mov QWORD PTR [rdx+72], r8 mov rax, QWORD PTR [rcx+424] mov r8, QWORD PTR [rcx+416] bswap rax bswap r8 mov QWORD PTR [rdx+80], rax mov QWORD PTR [rdx+88], r8 mov rax, QWORD PTR [rcx+408] mov r8, QWORD PTR [rcx+400] bswap rax bswap r8 mov QWORD PTR [rdx+96], rax mov QWORD PTR [rdx+104], r8 mov rax, QWORD PTR [rcx+392] mov r8, QWORD PTR [rcx+384] bswap rax bswap r8 mov QWORD PTR [rdx+112], rax mov QWORD PTR [rdx+120], r8 mov rax, QWORD PTR [rcx+376] mov r8, QWORD PTR [rcx+368] bswap rax bswap r8 mov QWORD PTR [rdx+128], rax mov QWORD PTR [rdx+136], r8 mov rax, QWORD PTR [rcx+360] mov r8, QWORD PTR [rcx+352] bswap rax bswap r8 mov QWORD PTR [rdx+144], rax mov QWORD PTR [rdx+152], r8 mov rax, QWORD PTR [rcx+344] mov r8, QWORD PTR [rcx+336] bswap rax bswap r8 mov QWORD PTR [rdx+160], rax mov QWORD PTR [rdx+168], r8 mov rax, QWORD PTR [rcx+328] mov r8, QWORD PTR [rcx+320] bswap rax bswap r8 mov QWORD PTR [rdx+176], rax mov QWORD PTR [rdx+184], r8 mov rax, QWORD PTR [rcx+312] mov r8, QWORD PTR [rcx+304] bswap rax bswap r8 mov QWORD PTR [rdx+192], rax mov QWORD PTR [rdx+200], r8 mov rax, QWORD PTR [rcx+296] mov r8, QWORD PTR [rcx+288] bswap rax bswap r8 mov QWORD PTR [rdx+208], rax mov QWORD PTR [rdx+216], r8 mov rax, QWORD PTR [rcx+280] mov r8, QWORD PTR [rcx+272] bswap rax bswap r8 mov QWORD PTR [rdx+224], rax mov QWORD PTR [rdx+232], r8 mov rax, QWORD PTR [rcx+264] mov r8, QWORD PTR [rcx+256] bswap rax bswap r8 mov QWORD PTR [rdx+240], rax mov QWORD PTR [rdx+248], r8 mov rax, QWORD PTR [rcx+248] mov r8, QWORD PTR [rcx+240] bswap rax bswap r8 mov QWORD PTR [rdx+256], rax mov QWORD PTR [rdx+264], r8 mov rax, QWORD PTR [rcx+232] mov r8, QWORD PTR [rcx+224] bswap rax bswap r8 mov QWORD PTR [rdx+272], rax mov QWORD PTR [rdx+280], r8 mov rax, QWORD PTR [rcx+216] mov r8, QWORD PTR [rcx+208] bswap rax bswap r8 mov QWORD PTR [rdx+288], rax mov QWORD PTR [rdx+296], r8 mov rax, QWORD PTR [rcx+200] mov r8, QWORD PTR [rcx+192] bswap rax bswap r8 mov QWORD PTR [rdx+304], rax mov QWORD PTR [rdx+312], r8 mov rax, QWORD PTR [rcx+184] mov r8, QWORD PTR [rcx+176] bswap rax bswap r8 mov QWORD PTR [rdx+320], rax mov QWORD PTR [rdx+328], r8 mov rax, QWORD PTR [rcx+168] mov r8, QWORD PTR [rcx+160] bswap rax bswap r8 mov QWORD PTR [rdx+336], rax mov QWORD PTR [rdx+344], r8 mov rax, QWORD PTR [rcx+152] mov r8, QWORD PTR [rcx+144] bswap rax bswap r8 mov QWORD PTR [rdx+352], rax mov QWORD PTR [rdx+360], r8 mov rax, QWORD PTR [rcx+136] mov r8, QWORD PTR [rcx+128] bswap rax bswap r8 mov QWORD PTR [rdx+368], rax mov QWORD PTR [rdx+376], r8 mov rax, QWORD PTR [rcx+120] mov r8, QWORD PTR [rcx+112] bswap rax bswap r8 mov QWORD PTR [rdx+384], rax mov QWORD PTR [rdx+392], r8 mov rax, QWORD PTR [rcx+104] mov r8, QWORD PTR [rcx+96] bswap rax bswap r8 mov QWORD PTR [rdx+400], rax mov QWORD PTR [rdx+408], r8 mov rax, QWORD PTR [rcx+88] mov r8, QWORD PTR [rcx+80] bswap rax bswap r8 mov QWORD PTR [rdx+416], rax mov QWORD PTR [rdx+424], r8 mov rax, QWORD PTR [rcx+72] mov r8, QWORD PTR [rcx+64] bswap rax bswap r8 mov QWORD PTR [rdx+432], rax mov QWORD PTR [rdx+440], r8 mov rax, QWORD PTR [rcx+56] mov r8, QWORD PTR [rcx+48] bswap rax bswap r8 mov QWORD PTR [rdx+448], rax mov QWORD PTR [rdx+456], r8 mov rax, QWORD PTR [rcx+40] mov r8, QWORD PTR [rcx+32] bswap rax bswap r8 mov QWORD PTR [rdx+464], rax mov QWORD PTR [rdx+472], r8 mov rax, QWORD PTR [rcx+24] mov r8, QWORD PTR [rcx+16] bswap rax bswap r8 mov QWORD PTR [rdx+480], rax mov QWORD PTR [rdx+488], r8 mov rax, QWORD PTR [rcx+8] mov r8, QWORD PTR [rcx] bswap rax bswap r8 mov QWORD PTR [rdx+496], rax mov QWORD PTR [rdx+504], r8 ret sp_4096_to_bin_bswap_64 ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 512 ; * Uses the movbe instruction which is optional. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_4096_to_bin_movbe_64 PROC movbe rax, QWORD PTR [rcx+504] movbe r8, QWORD PTR [rcx+496] mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 movbe rax, QWORD PTR [rcx+488] movbe r8, QWORD PTR [rcx+480] mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 movbe rax, QWORD PTR [rcx+472] movbe r8, QWORD PTR [rcx+464] mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 movbe rax, QWORD PTR [rcx+456] movbe r8, QWORD PTR [rcx+448] mov QWORD PTR [rdx+48], rax mov QWORD PTR [rdx+56], r8 movbe rax, QWORD PTR [rcx+440] movbe r8, QWORD PTR [rcx+432] mov QWORD PTR [rdx+64], rax mov QWORD PTR [rdx+72], r8 movbe rax, QWORD PTR [rcx+424] movbe r8, QWORD PTR [rcx+416] mov QWORD PTR [rdx+80], rax mov QWORD PTR [rdx+88], r8 movbe rax, QWORD PTR [rcx+408] movbe r8, QWORD PTR [rcx+400] mov QWORD PTR [rdx+96], rax mov QWORD PTR [rdx+104], r8 movbe rax, QWORD PTR [rcx+392] movbe r8, QWORD PTR [rcx+384] mov QWORD PTR [rdx+112], rax mov QWORD PTR [rdx+120], r8 movbe rax, QWORD PTR [rcx+376] movbe r8, QWORD PTR [rcx+368] mov QWORD PTR [rdx+128], rax mov QWORD PTR [rdx+136], r8 movbe rax, QWORD PTR [rcx+360] movbe r8, QWORD PTR [rcx+352] mov QWORD PTR [rdx+144], rax mov QWORD PTR [rdx+152], r8 movbe rax, QWORD PTR [rcx+344] movbe r8, QWORD PTR [rcx+336] mov QWORD PTR [rdx+160], rax mov QWORD PTR [rdx+168], r8 movbe rax, QWORD PTR [rcx+328] movbe r8, QWORD PTR [rcx+320] mov QWORD PTR [rdx+176], rax mov QWORD PTR [rdx+184], r8 movbe rax, QWORD PTR [rcx+312] movbe r8, QWORD PTR [rcx+304] mov QWORD PTR [rdx+192], rax mov QWORD PTR [rdx+200], r8 movbe rax, QWORD PTR [rcx+296] movbe r8, QWORD PTR [rcx+288] mov QWORD PTR [rdx+208], rax mov QWORD PTR [rdx+216], r8 movbe rax, QWORD PTR [rcx+280] movbe r8, QWORD PTR [rcx+272] mov QWORD PTR [rdx+224], rax mov QWORD PTR [rdx+232], r8 movbe rax, QWORD PTR [rcx+264] movbe r8, QWORD PTR [rcx+256] mov QWORD PTR [rdx+240], rax mov QWORD PTR [rdx+248], r8 movbe rax, QWORD PTR [rcx+248] movbe r8, QWORD PTR [rcx+240] mov QWORD PTR [rdx+256], rax mov QWORD PTR [rdx+264], r8 movbe rax, QWORD PTR [rcx+232] movbe r8, QWORD PTR [rcx+224] mov QWORD PTR [rdx+272], rax mov QWORD PTR [rdx+280], r8 movbe rax, QWORD PTR [rcx+216] movbe r8, QWORD PTR [rcx+208] mov QWORD PTR [rdx+288], rax mov QWORD PTR [rdx+296], r8 movbe rax, QWORD PTR [rcx+200] movbe r8, QWORD PTR [rcx+192] mov QWORD PTR [rdx+304], rax mov QWORD PTR [rdx+312], r8 movbe rax, QWORD PTR [rcx+184] movbe r8, QWORD PTR [rcx+176] mov QWORD PTR [rdx+320], rax mov QWORD PTR [rdx+328], r8 movbe rax, QWORD PTR [rcx+168] movbe r8, QWORD PTR [rcx+160] mov QWORD PTR [rdx+336], rax mov QWORD PTR [rdx+344], r8 movbe rax, QWORD PTR [rcx+152] movbe r8, QWORD PTR [rcx+144] mov QWORD PTR [rdx+352], rax mov QWORD PTR [rdx+360], r8 movbe rax, QWORD PTR [rcx+136] movbe r8, QWORD PTR [rcx+128] mov QWORD PTR [rdx+368], rax mov QWORD PTR [rdx+376], r8 movbe rax, QWORD PTR [rcx+120] movbe r8, QWORD PTR [rcx+112] mov QWORD PTR [rdx+384], rax mov QWORD PTR [rdx+392], r8 movbe rax, QWORD PTR [rcx+104] movbe r8, QWORD PTR [rcx+96] mov QWORD PTR [rdx+400], rax mov QWORD PTR [rdx+408], r8 movbe rax, QWORD PTR [rcx+88] movbe r8, QWORD PTR [rcx+80] mov QWORD PTR [rdx+416], rax mov QWORD PTR [rdx+424], r8 movbe rax, QWORD PTR [rcx+72] movbe r8, QWORD PTR [rcx+64] mov QWORD PTR [rdx+432], rax mov QWORD PTR [rdx+440], r8 movbe rax, QWORD PTR [rcx+56] movbe r8, QWORD PTR [rcx+48] mov QWORD PTR [rdx+448], rax mov QWORD PTR [rdx+456], r8 movbe rax, QWORD PTR [rcx+40] movbe r8, QWORD PTR [rcx+32] mov QWORD PTR [rdx+464], rax mov QWORD PTR [rdx+472], r8 movbe rax, QWORD PTR [rcx+24] movbe r8, QWORD PTR [rcx+16] mov QWORD PTR [rdx+480], rax mov QWORD PTR [rdx+488], r8 movbe rax, QWORD PTR [rcx+8] movbe r8, QWORD PTR [rcx] mov QWORD PTR [rdx+496], rax mov QWORD PTR [rdx+504], r8 ret sp_4096_to_bin_movbe_64 ENDP _text ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_4096_sub_in_place_64 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] mov r9, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb r9, QWORD PTR [rdx+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], r9 sbb r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb r9, QWORD PTR [rdx+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], r9 sbb r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb r9, QWORD PTR [rdx+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], r9 sbb r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb r9, QWORD PTR [rdx+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], r9 sbb r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb r9, QWORD PTR [rdx+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], r9 sbb r8, QWORD PTR [rdx+80] mov r9, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb r9, QWORD PTR [rdx+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], r9 sbb r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb r9, QWORD PTR [rdx+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], r9 sbb r8, QWORD PTR [rdx+112] mov r9, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb r9, QWORD PTR [rdx+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], r9 sbb r8, QWORD PTR [rdx+128] mov r9, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb r9, QWORD PTR [rdx+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], r9 sbb r8, QWORD PTR [rdx+144] mov r9, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb r9, QWORD PTR [rdx+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], r9 sbb r8, QWORD PTR [rdx+160] mov r9, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb r9, QWORD PTR [rdx+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], r9 sbb r8, QWORD PTR [rdx+176] mov r9, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb r9, QWORD PTR [rdx+184] mov r8, QWORD PTR [rcx+192] mov QWORD PTR [rcx+184], r9 sbb r8, QWORD PTR [rdx+192] mov r9, QWORD PTR [rcx+200] mov QWORD PTR [rcx+192], r8 sbb r9, QWORD PTR [rdx+200] mov r8, QWORD PTR [rcx+208] mov QWORD PTR [rcx+200], r9 sbb r8, QWORD PTR [rdx+208] mov r9, QWORD PTR [rcx+216] mov QWORD PTR [rcx+208], r8 sbb r9, QWORD PTR [rdx+216] mov r8, QWORD PTR [rcx+224] mov QWORD PTR [rcx+216], r9 sbb r8, QWORD PTR [rdx+224] mov r9, QWORD PTR [rcx+232] mov QWORD PTR [rcx+224], r8 sbb r9, QWORD PTR [rdx+232] mov r8, QWORD PTR [rcx+240] mov QWORD PTR [rcx+232], r9 sbb r8, QWORD PTR [rdx+240] mov r9, QWORD PTR [rcx+248] mov QWORD PTR [rcx+240], r8 sbb r9, QWORD PTR [rdx+248] mov r8, QWORD PTR [rcx+256] mov QWORD PTR [rcx+248], r9 sbb r8, QWORD PTR [rdx+256] mov r9, QWORD PTR [rcx+264] mov QWORD PTR [rcx+256], r8 sbb r9, QWORD PTR [rdx+264] mov r8, QWORD PTR [rcx+272] mov QWORD PTR [rcx+264], r9 sbb r8, QWORD PTR [rdx+272] mov r9, QWORD PTR [rcx+280] mov QWORD PTR [rcx+272], r8 sbb r9, QWORD PTR [rdx+280] mov r8, QWORD PTR [rcx+288] mov QWORD PTR [rcx+280], r9 sbb r8, QWORD PTR [rdx+288] mov r9, QWORD PTR [rcx+296] mov QWORD PTR [rcx+288], r8 sbb r9, QWORD PTR [rdx+296] mov r8, QWORD PTR [rcx+304] mov QWORD PTR [rcx+296], r9 sbb r8, QWORD PTR [rdx+304] mov r9, QWORD PTR [rcx+312] mov QWORD PTR [rcx+304], r8 sbb r9, QWORD PTR [rdx+312] mov r8, QWORD PTR [rcx+320] mov QWORD PTR [rcx+312], r9 sbb r8, QWORD PTR [rdx+320] mov r9, QWORD PTR [rcx+328] mov QWORD PTR [rcx+320], r8 sbb r9, QWORD PTR [rdx+328] mov r8, QWORD PTR [rcx+336] mov QWORD PTR [rcx+328], r9 sbb r8, QWORD PTR [rdx+336] mov r9, QWORD PTR [rcx+344] mov QWORD PTR [rcx+336], r8 sbb r9, QWORD PTR [rdx+344] mov r8, QWORD PTR [rcx+352] mov QWORD PTR [rcx+344], r9 sbb r8, QWORD PTR [rdx+352] mov r9, QWORD PTR [rcx+360] mov QWORD PTR [rcx+352], r8 sbb r9, QWORD PTR [rdx+360] mov r8, QWORD PTR [rcx+368] mov QWORD PTR [rcx+360], r9 sbb r8, QWORD PTR [rdx+368] mov r9, QWORD PTR [rcx+376] mov QWORD PTR [rcx+368], r8 sbb r9, QWORD PTR [rdx+376] mov r8, QWORD PTR [rcx+384] mov QWORD PTR [rcx+376], r9 sbb r8, QWORD PTR [rdx+384] mov r9, QWORD PTR [rcx+392] mov QWORD PTR [rcx+384], r8 sbb r9, QWORD PTR [rdx+392] mov r8, QWORD PTR [rcx+400] mov QWORD PTR [rcx+392], r9 sbb r8, QWORD PTR [rdx+400] mov r9, QWORD PTR [rcx+408] mov QWORD PTR [rcx+400], r8 sbb r9, QWORD PTR [rdx+408] mov r8, QWORD PTR [rcx+416] mov QWORD PTR [rcx+408], r9 sbb r8, QWORD PTR [rdx+416] mov r9, QWORD PTR [rcx+424] mov QWORD PTR [rcx+416], r8 sbb r9, QWORD PTR [rdx+424] mov r8, QWORD PTR [rcx+432] mov QWORD PTR [rcx+424], r9 sbb r8, QWORD PTR [rdx+432] mov r9, QWORD PTR [rcx+440] mov QWORD PTR [rcx+432], r8 sbb r9, QWORD PTR [rdx+440] mov r8, QWORD PTR [rcx+448] mov QWORD PTR [rcx+440], r9 sbb r8, QWORD PTR [rdx+448] mov r9, QWORD PTR [rcx+456] mov QWORD PTR [rcx+448], r8 sbb r9, QWORD PTR [rdx+456] mov r8, QWORD PTR [rcx+464] mov QWORD PTR [rcx+456], r9 sbb r8, QWORD PTR [rdx+464] mov r9, QWORD PTR [rcx+472] mov QWORD PTR [rcx+464], r8 sbb r9, QWORD PTR [rdx+472] mov r8, QWORD PTR [rcx+480] mov QWORD PTR [rcx+472], r9 sbb r8, QWORD PTR [rdx+480] mov r9, QWORD PTR [rcx+488] mov QWORD PTR [rcx+480], r8 sbb r9, QWORD PTR [rdx+488] mov r8, QWORD PTR [rcx+496] mov QWORD PTR [rcx+488], r9 sbb r8, QWORD PTR [rdx+496] mov r9, QWORD PTR [rcx+504] mov QWORD PTR [rcx+496], r8 sbb r9, QWORD PTR [rdx+504] mov QWORD PTR [rcx+504], r9 sbb rax, rax ret sp_4096_sub_in_place_64 ENDP _text ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_4096_add_64 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 adc r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 adc r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 adc r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 adc r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 adc r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 adc r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 adc r10, QWORD PTR [r8+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [rcx+120], r10 adc r9, QWORD PTR [r8+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [rcx+128], r9 adc r10, QWORD PTR [r8+136] mov r9, QWORD PTR [rdx+144] mov QWORD PTR [rcx+136], r10 adc r9, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+152] mov QWORD PTR [rcx+144], r9 adc r10, QWORD PTR [r8+152] mov r9, QWORD PTR [rdx+160] mov QWORD PTR [rcx+152], r10 adc r9, QWORD PTR [r8+160] mov r10, QWORD PTR [rdx+168] mov QWORD PTR [rcx+160], r9 adc r10, QWORD PTR [r8+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [rcx+168], r10 adc r9, QWORD PTR [r8+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [rcx+176], r9 adc r10, QWORD PTR [r8+184] mov r9, QWORD PTR [rdx+192] mov QWORD PTR [rcx+184], r10 adc r9, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+200] mov QWORD PTR [rcx+192], r9 adc r10, QWORD PTR [r8+200] mov r9, QWORD PTR [rdx+208] mov QWORD PTR [rcx+200], r10 adc r9, QWORD PTR [r8+208] mov r10, QWORD PTR [rdx+216] mov QWORD PTR [rcx+208], r9 adc r10, QWORD PTR [r8+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [rcx+216], r10 adc r9, QWORD PTR [r8+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [rcx+224], r9 adc r10, QWORD PTR [r8+232] mov r9, QWORD PTR [rdx+240] mov QWORD PTR [rcx+232], r10 adc r9, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+248] mov QWORD PTR [rcx+240], r9 adc r10, QWORD PTR [r8+248] mov r9, QWORD PTR [rdx+256] mov QWORD PTR [rcx+248], r10 adc r9, QWORD PTR [r8+256] mov r10, QWORD PTR [rdx+264] mov QWORD PTR [rcx+256], r9 adc r10, QWORD PTR [r8+264] mov r9, QWORD PTR [rdx+272] mov QWORD PTR [rcx+264], r10 adc r9, QWORD PTR [r8+272] mov r10, QWORD PTR [rdx+280] mov QWORD PTR [rcx+272], r9 adc r10, QWORD PTR [r8+280] mov r9, QWORD PTR [rdx+288] mov QWORD PTR [rcx+280], r10 adc r9, QWORD PTR [r8+288] mov r10, QWORD PTR [rdx+296] mov QWORD PTR [rcx+288], r9 adc r10, QWORD PTR [r8+296] mov r9, QWORD PTR [rdx+304] mov QWORD PTR [rcx+296], r10 adc r9, QWORD PTR [r8+304] mov r10, QWORD PTR [rdx+312] mov QWORD PTR [rcx+304], r9 adc r10, QWORD PTR [r8+312] mov r9, QWORD PTR [rdx+320] mov QWORD PTR [rcx+312], r10 adc r9, QWORD PTR [r8+320] mov r10, QWORD PTR [rdx+328] mov QWORD PTR [rcx+320], r9 adc r10, QWORD PTR [r8+328] mov r9, QWORD PTR [rdx+336] mov QWORD PTR [rcx+328], r10 adc r9, QWORD PTR [r8+336] mov r10, QWORD PTR [rdx+344] mov QWORD PTR [rcx+336], r9 adc r10, QWORD PTR [r8+344] mov r9, QWORD PTR [rdx+352] mov QWORD PTR [rcx+344], r10 adc r9, QWORD PTR [r8+352] mov r10, QWORD PTR [rdx+360] mov QWORD PTR [rcx+352], r9 adc r10, QWORD PTR [r8+360] mov r9, QWORD PTR [rdx+368] mov QWORD PTR [rcx+360], r10 adc r9, QWORD PTR [r8+368] mov r10, QWORD PTR [rdx+376] mov QWORD PTR [rcx+368], r9 adc r10, QWORD PTR [r8+376] mov r9, QWORD PTR [rdx+384] mov QWORD PTR [rcx+376], r10 adc r9, QWORD PTR [r8+384] mov r10, QWORD PTR [rdx+392] mov QWORD PTR [rcx+384], r9 adc r10, QWORD PTR [r8+392] mov r9, QWORD PTR [rdx+400] mov QWORD PTR [rcx+392], r10 adc r9, QWORD PTR [r8+400] mov r10, QWORD PTR [rdx+408] mov QWORD PTR [rcx+400], r9 adc r10, QWORD PTR [r8+408] mov r9, QWORD PTR [rdx+416] mov QWORD PTR [rcx+408], r10 adc r9, QWORD PTR [r8+416] mov r10, QWORD PTR [rdx+424] mov QWORD PTR [rcx+416], r9 adc r10, QWORD PTR [r8+424] mov r9, QWORD PTR [rdx+432] mov QWORD PTR [rcx+424], r10 adc r9, QWORD PTR [r8+432] mov r10, QWORD PTR [rdx+440] mov QWORD PTR [rcx+432], r9 adc r10, QWORD PTR [r8+440] mov r9, QWORD PTR [rdx+448] mov QWORD PTR [rcx+440], r10 adc r9, QWORD PTR [r8+448] mov r10, QWORD PTR [rdx+456] mov QWORD PTR [rcx+448], r9 adc r10, QWORD PTR [r8+456] mov r9, QWORD PTR [rdx+464] mov QWORD PTR [rcx+456], r10 adc r9, QWORD PTR [r8+464] mov r10, QWORD PTR [rdx+472] mov QWORD PTR [rcx+464], r9 adc r10, QWORD PTR [r8+472] mov r9, QWORD PTR [rdx+480] mov QWORD PTR [rcx+472], r10 adc r9, QWORD PTR [r8+480] mov r10, QWORD PTR [rdx+488] mov QWORD PTR [rcx+480], r9 adc r10, QWORD PTR [r8+488] mov r9, QWORD PTR [rdx+496] mov QWORD PTR [rcx+488], r10 adc r9, QWORD PTR [r8+496] mov r10, QWORD PTR [rdx+504] mov QWORD PTR [rcx+496], r9 adc r10, QWORD PTR [r8+504] mov QWORD PTR [rcx+504], r10 adc rax, 0 ret sp_4096_add_64 ENDP _text ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_4096_mul_64 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 1576 mov QWORD PTR [rsp+1536], rcx mov QWORD PTR [rsp+1544], rdx mov QWORD PTR [rsp+1552], r8 lea r12, QWORD PTR [rsp+1024] lea r14, QWORD PTR [rdx+256] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r12+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [rdx+104] mov QWORD PTR [r12+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [rdx+112] mov QWORD PTR [r12+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [rdx+120] mov QWORD PTR [r12+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [r12+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [r12+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r12+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [rdx+152] mov QWORD PTR [r12+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [rdx+160] mov QWORD PTR [r12+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [rdx+168] mov QWORD PTR [r12+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [r12+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [r12+176], r9 adc r10, QWORD PTR [r14+184] mov rax, QWORD PTR [rdx+192] mov QWORD PTR [r12+184], r10 adc rax, QWORD PTR [r14+192] mov r9, QWORD PTR [rdx+200] mov QWORD PTR [r12+192], rax adc r9, QWORD PTR [r14+200] mov r10, QWORD PTR [rdx+208] mov QWORD PTR [r12+200], r9 adc r10, QWORD PTR [r14+208] mov rax, QWORD PTR [rdx+216] mov QWORD PTR [r12+208], r10 adc rax, QWORD PTR [r14+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [r12+216], rax adc r9, QWORD PTR [r14+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [r12+224], r9 adc r10, QWORD PTR [r14+232] mov rax, QWORD PTR [rdx+240] mov QWORD PTR [r12+232], r10 adc rax, QWORD PTR [r14+240] mov r9, QWORD PTR [rdx+248] mov QWORD PTR [r12+240], rax adc r9, QWORD PTR [r14+248] mov QWORD PTR [r12+248], r9 adc r15, 0 mov QWORD PTR [rsp+1560], r15 lea r13, QWORD PTR [rsp+1280] lea r14, QWORD PTR [r8+256] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [r8+96] mov QWORD PTR [r13+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [r8+104] mov QWORD PTR [r13+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [r8+112] mov QWORD PTR [r13+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [r8+120] mov QWORD PTR [r13+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [r8+128] mov QWORD PTR [r13+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [r8+136] mov QWORD PTR [r13+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [r8+144] mov QWORD PTR [r13+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [r8+152] mov QWORD PTR [r13+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [r8+160] mov QWORD PTR [r13+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [r8+168] mov QWORD PTR [r13+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [r8+176] mov QWORD PTR [r13+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [r8+184] mov QWORD PTR [r13+176], r9 adc r10, QWORD PTR [r14+184] mov rax, QWORD PTR [r8+192] mov QWORD PTR [r13+184], r10 adc rax, QWORD PTR [r14+192] mov r9, QWORD PTR [r8+200] mov QWORD PTR [r13+192], rax adc r9, QWORD PTR [r14+200] mov r10, QWORD PTR [r8+208] mov QWORD PTR [r13+200], r9 adc r10, QWORD PTR [r14+208] mov rax, QWORD PTR [r8+216] mov QWORD PTR [r13+208], r10 adc rax, QWORD PTR [r14+216] mov r9, QWORD PTR [r8+224] mov QWORD PTR [r13+216], rax adc r9, QWORD PTR [r14+224] mov r10, QWORD PTR [r8+232] mov QWORD PTR [r13+224], r9 adc r10, QWORD PTR [r14+232] mov rax, QWORD PTR [r8+240] mov QWORD PTR [r13+232], r10 adc rax, QWORD PTR [r14+240] mov r9, QWORD PTR [r8+248] mov QWORD PTR [r13+240], rax adc r9, QWORD PTR [r14+248] mov QWORD PTR [r13+248], r9 adc rdi, 0 mov QWORD PTR [rsp+1568], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_2048_mul_32 mov r8, QWORD PTR [rsp+1552] mov rdx, QWORD PTR [rsp+1544] lea rcx, QWORD PTR [rsp+512] add r8, 256 add rdx, 256 call sp_2048_mul_32 mov r8, QWORD PTR [rsp+1552] mov rdx, QWORD PTR [rsp+1544] mov rcx, QWORD PTR [rsp+1536] call sp_2048_mul_32 IFDEF _WIN64 mov r8, QWORD PTR [rsp+1552] mov rdx, QWORD PTR [rsp+1544] mov rcx, QWORD PTR [rsp+1536] ENDIF mov r15, QWORD PTR [rsp+1560] mov rdi, QWORD PTR [rsp+1568] mov rsi, QWORD PTR [rsp+1536] mov r11, r15 lea r12, QWORD PTR [rsp+1024] lea r13, QWORD PTR [rsp+1280] and r11, rdi neg r15 neg rdi add rsi, 512 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] and rax, rdi and r9, r15 mov QWORD PTR [r12], rax mov QWORD PTR [r13], r9 mov rax, QWORD PTR [r12+8] mov r9, QWORD PTR [r13+8] and rax, rdi and r9, r15 mov QWORD PTR [r12+8], rax mov QWORD PTR [r13+8], r9 mov rax, QWORD PTR [r12+16] mov r9, QWORD PTR [r13+16] and rax, rdi and r9, r15 mov QWORD PTR [r12+16], rax mov QWORD PTR [r13+16], r9 mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] and rax, rdi and r9, r15 mov QWORD PTR [r12+24], rax mov QWORD PTR [r13+24], r9 mov rax, QWORD PTR [r12+32] mov r9, QWORD PTR [r13+32] and rax, rdi and r9, r15 mov QWORD PTR [r12+32], rax mov QWORD PTR [r13+32], r9 mov rax, QWORD PTR [r12+40] mov r9, QWORD PTR [r13+40] and rax, rdi and r9, r15 mov QWORD PTR [r12+40], rax mov QWORD PTR [r13+40], r9 mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] and rax, rdi and r9, r15 mov QWORD PTR [r12+48], rax mov QWORD PTR [r13+48], r9 mov rax, QWORD PTR [r12+56] mov r9, QWORD PTR [r13+56] and rax, rdi and r9, r15 mov QWORD PTR [r12+56], rax mov QWORD PTR [r13+56], r9 mov rax, QWORD PTR [r12+64] mov r9, QWORD PTR [r13+64] and rax, rdi and r9, r15 mov QWORD PTR [r12+64], rax mov QWORD PTR [r13+64], r9 mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] and rax, rdi and r9, r15 mov QWORD PTR [r12+72], rax mov QWORD PTR [r13+72], r9 mov rax, QWORD PTR [r12+80] mov r9, QWORD PTR [r13+80] and rax, rdi and r9, r15 mov QWORD PTR [r12+80], rax mov QWORD PTR [r13+80], r9 mov rax, QWORD PTR [r12+88] mov r9, QWORD PTR [r13+88] and rax, rdi and r9, r15 mov QWORD PTR [r12+88], rax mov QWORD PTR [r13+88], r9 mov rax, QWORD PTR [r12+96] mov r9, QWORD PTR [r13+96] and rax, rdi and r9, r15 mov QWORD PTR [r12+96], rax mov QWORD PTR [r13+96], r9 mov rax, QWORD PTR [r12+104] mov r9, QWORD PTR [r13+104] and rax, rdi and r9, r15 mov QWORD PTR [r12+104], rax mov QWORD PTR [r13+104], r9 mov rax, QWORD PTR [r12+112] mov r9, QWORD PTR [r13+112] and rax, rdi and r9, r15 mov QWORD PTR [r12+112], rax mov QWORD PTR [r13+112], r9 mov rax, QWORD PTR [r12+120] mov r9, QWORD PTR [r13+120] and rax, rdi and r9, r15 mov QWORD PTR [r12+120], rax mov QWORD PTR [r13+120], r9 mov rax, QWORD PTR [r12+128] mov r9, QWORD PTR [r13+128] and rax, rdi and r9, r15 mov QWORD PTR [r12+128], rax mov QWORD PTR [r13+128], r9 mov rax, QWORD PTR [r12+136] mov r9, QWORD PTR [r13+136] and rax, rdi and r9, r15 mov QWORD PTR [r12+136], rax mov QWORD PTR [r13+136], r9 mov rax, QWORD PTR [r12+144] mov r9, QWORD PTR [r13+144] and rax, rdi and r9, r15 mov QWORD PTR [r12+144], rax mov QWORD PTR [r13+144], r9 mov rax, QWORD PTR [r12+152] mov r9, QWORD PTR [r13+152] and rax, rdi and r9, r15 mov QWORD PTR [r12+152], rax mov QWORD PTR [r13+152], r9 mov rax, QWORD PTR [r12+160] mov r9, QWORD PTR [r13+160] and rax, rdi and r9, r15 mov QWORD PTR [r12+160], rax mov QWORD PTR [r13+160], r9 mov rax, QWORD PTR [r12+168] mov r9, QWORD PTR [r13+168] and rax, rdi and r9, r15 mov QWORD PTR [r12+168], rax mov QWORD PTR [r13+168], r9 mov rax, QWORD PTR [r12+176] mov r9, QWORD PTR [r13+176] and rax, rdi and r9, r15 mov QWORD PTR [r12+176], rax mov QWORD PTR [r13+176], r9 mov rax, QWORD PTR [r12+184] mov r9, QWORD PTR [r13+184] and rax, rdi and r9, r15 mov QWORD PTR [r12+184], rax mov QWORD PTR [r13+184], r9 mov rax, QWORD PTR [r12+192] mov r9, QWORD PTR [r13+192] and rax, rdi and r9, r15 mov QWORD PTR [r12+192], rax mov QWORD PTR [r13+192], r9 mov rax, QWORD PTR [r12+200] mov r9, QWORD PTR [r13+200] and rax, rdi and r9, r15 mov QWORD PTR [r12+200], rax mov QWORD PTR [r13+200], r9 mov rax, QWORD PTR [r12+208] mov r9, QWORD PTR [r13+208] and rax, rdi and r9, r15 mov QWORD PTR [r12+208], rax mov QWORD PTR [r13+208], r9 mov rax, QWORD PTR [r12+216] mov r9, QWORD PTR [r13+216] and rax, rdi and r9, r15 mov QWORD PTR [r12+216], rax mov QWORD PTR [r13+216], r9 mov rax, QWORD PTR [r12+224] mov r9, QWORD PTR [r13+224] and rax, rdi and r9, r15 mov QWORD PTR [r12+224], rax mov QWORD PTR [r13+224], r9 mov rax, QWORD PTR [r12+232] mov r9, QWORD PTR [r13+232] and rax, rdi and r9, r15 mov QWORD PTR [r12+232], rax mov QWORD PTR [r13+232], r9 mov rax, QWORD PTR [r12+240] mov r9, QWORD PTR [r13+240] and rax, rdi and r9, r15 mov QWORD PTR [r12+240], rax mov QWORD PTR [r13+240], r9 mov rax, QWORD PTR [r12+248] mov r9, QWORD PTR [r13+248] and rax, rdi and r9, r15 mov QWORD PTR [r12+248], rax mov QWORD PTR [r13+248], r9 mov rax, QWORD PTR [r12] add rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r13+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r13+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r13+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r13+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r13+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r13+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r13+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r13+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r13+248] mov QWORD PTR [rsi+248], r9 adc r11, 0 lea r13, QWORD PTR [rsp+512] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [r13+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [r13+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [r13+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [r13+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [r13+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [r13+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [r13+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [r13+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [r13+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [r13+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [r13+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [r13+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [r13+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [r13+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [r13+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [r13+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [r13+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [r13+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [r13+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [r13+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [r13+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [r13+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [r13+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [r13+376] mov rax, QWORD PTR [r12+384] mov QWORD PTR [r12+376], r10 sbb rax, QWORD PTR [r13+384] mov r9, QWORD PTR [r12+392] mov QWORD PTR [r12+384], rax sbb r9, QWORD PTR [r13+392] mov r10, QWORD PTR [r12+400] mov QWORD PTR [r12+392], r9 sbb r10, QWORD PTR [r13+400] mov rax, QWORD PTR [r12+408] mov QWORD PTR [r12+400], r10 sbb rax, QWORD PTR [r13+408] mov r9, QWORD PTR [r12+416] mov QWORD PTR [r12+408], rax sbb r9, QWORD PTR [r13+416] mov r10, QWORD PTR [r12+424] mov QWORD PTR [r12+416], r9 sbb r10, QWORD PTR [r13+424] mov rax, QWORD PTR [r12+432] mov QWORD PTR [r12+424], r10 sbb rax, QWORD PTR [r13+432] mov r9, QWORD PTR [r12+440] mov QWORD PTR [r12+432], rax sbb r9, QWORD PTR [r13+440] mov r10, QWORD PTR [r12+448] mov QWORD PTR [r12+440], r9 sbb r10, QWORD PTR [r13+448] mov rax, QWORD PTR [r12+456] mov QWORD PTR [r12+448], r10 sbb rax, QWORD PTR [r13+456] mov r9, QWORD PTR [r12+464] mov QWORD PTR [r12+456], rax sbb r9, QWORD PTR [r13+464] mov r10, QWORD PTR [r12+472] mov QWORD PTR [r12+464], r9 sbb r10, QWORD PTR [r13+472] mov rax, QWORD PTR [r12+480] mov QWORD PTR [r12+472], r10 sbb rax, QWORD PTR [r13+480] mov r9, QWORD PTR [r12+488] mov QWORD PTR [r12+480], rax sbb r9, QWORD PTR [r13+488] mov r10, QWORD PTR [r12+496] mov QWORD PTR [r12+488], r9 sbb r10, QWORD PTR [r13+496] mov rax, QWORD PTR [r12+504] mov QWORD PTR [r12+496], r10 sbb rax, QWORD PTR [r13+504] mov QWORD PTR [r12+504], rax sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [rcx+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [rcx+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [rcx+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [rcx+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [rcx+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [rcx+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [rcx+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [rcx+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [rcx+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [rcx+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [rcx+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [rcx+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [rcx+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [rcx+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [rcx+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [rcx+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [rcx+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [rcx+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [rcx+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [rcx+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [rcx+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [rcx+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [rcx+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [rcx+376] mov rax, QWORD PTR [r12+384] mov QWORD PTR [r12+376], r10 sbb rax, QWORD PTR [rcx+384] mov r9, QWORD PTR [r12+392] mov QWORD PTR [r12+384], rax sbb r9, QWORD PTR [rcx+392] mov r10, QWORD PTR [r12+400] mov QWORD PTR [r12+392], r9 sbb r10, QWORD PTR [rcx+400] mov rax, QWORD PTR [r12+408] mov QWORD PTR [r12+400], r10 sbb rax, QWORD PTR [rcx+408] mov r9, QWORD PTR [r12+416] mov QWORD PTR [r12+408], rax sbb r9, QWORD PTR [rcx+416] mov r10, QWORD PTR [r12+424] mov QWORD PTR [r12+416], r9 sbb r10, QWORD PTR [rcx+424] mov rax, QWORD PTR [r12+432] mov QWORD PTR [r12+424], r10 sbb rax, QWORD PTR [rcx+432] mov r9, QWORD PTR [r12+440] mov QWORD PTR [r12+432], rax sbb r9, QWORD PTR [rcx+440] mov r10, QWORD PTR [r12+448] mov QWORD PTR [r12+440], r9 sbb r10, QWORD PTR [rcx+448] mov rax, QWORD PTR [r12+456] mov QWORD PTR [r12+448], r10 sbb rax, QWORD PTR [rcx+456] mov r9, QWORD PTR [r12+464] mov QWORD PTR [r12+456], rax sbb r9, QWORD PTR [rcx+464] mov r10, QWORD PTR [r12+472] mov QWORD PTR [r12+464], r9 sbb r10, QWORD PTR [rcx+472] mov rax, QWORD PTR [r12+480] mov QWORD PTR [r12+472], r10 sbb rax, QWORD PTR [rcx+480] mov r9, QWORD PTR [r12+488] mov QWORD PTR [r12+480], rax sbb r9, QWORD PTR [rcx+488] mov r10, QWORD PTR [r12+496] mov QWORD PTR [r12+488], r9 sbb r10, QWORD PTR [rcx+496] mov rax, QWORD PTR [r12+504] mov QWORD PTR [r12+496], r10 sbb rax, QWORD PTR [rcx+504] mov QWORD PTR [r12+504], rax sbb r11, 0 sub rsi, 256 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r12+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r12+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r12+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r12+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r12+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r12+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r12+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r12+248] mov r10, QWORD PTR [rsi+256] mov QWORD PTR [rsi+248], r9 adc r10, QWORD PTR [r12+256] mov rax, QWORD PTR [rsi+264] mov QWORD PTR [rsi+256], r10 adc rax, QWORD PTR [r12+264] mov r9, QWORD PTR [rsi+272] mov QWORD PTR [rsi+264], rax adc r9, QWORD PTR [r12+272] mov r10, QWORD PTR [rsi+280] mov QWORD PTR [rsi+272], r9 adc r10, QWORD PTR [r12+280] mov rax, QWORD PTR [rsi+288] mov QWORD PTR [rsi+280], r10 adc rax, QWORD PTR [r12+288] mov r9, QWORD PTR [rsi+296] mov QWORD PTR [rsi+288], rax adc r9, QWORD PTR [r12+296] mov r10, QWORD PTR [rsi+304] mov QWORD PTR [rsi+296], r9 adc r10, QWORD PTR [r12+304] mov rax, QWORD PTR [rsi+312] mov QWORD PTR [rsi+304], r10 adc rax, QWORD PTR [r12+312] mov r9, QWORD PTR [rsi+320] mov QWORD PTR [rsi+312], rax adc r9, QWORD PTR [r12+320] mov r10, QWORD PTR [rsi+328] mov QWORD PTR [rsi+320], r9 adc r10, QWORD PTR [r12+328] mov rax, QWORD PTR [rsi+336] mov QWORD PTR [rsi+328], r10 adc rax, QWORD PTR [r12+336] mov r9, QWORD PTR [rsi+344] mov QWORD PTR [rsi+336], rax adc r9, QWORD PTR [r12+344] mov r10, QWORD PTR [rsi+352] mov QWORD PTR [rsi+344], r9 adc r10, QWORD PTR [r12+352] mov rax, QWORD PTR [rsi+360] mov QWORD PTR [rsi+352], r10 adc rax, QWORD PTR [r12+360] mov r9, QWORD PTR [rsi+368] mov QWORD PTR [rsi+360], rax adc r9, QWORD PTR [r12+368] mov r10, QWORD PTR [rsi+376] mov QWORD PTR [rsi+368], r9 adc r10, QWORD PTR [r12+376] mov rax, QWORD PTR [rsi+384] mov QWORD PTR [rsi+376], r10 adc rax, QWORD PTR [r12+384] mov r9, QWORD PTR [rsi+392] mov QWORD PTR [rsi+384], rax adc r9, QWORD PTR [r12+392] mov r10, QWORD PTR [rsi+400] mov QWORD PTR [rsi+392], r9 adc r10, QWORD PTR [r12+400] mov rax, QWORD PTR [rsi+408] mov QWORD PTR [rsi+400], r10 adc rax, QWORD PTR [r12+408] mov r9, QWORD PTR [rsi+416] mov QWORD PTR [rsi+408], rax adc r9, QWORD PTR [r12+416] mov r10, QWORD PTR [rsi+424] mov QWORD PTR [rsi+416], r9 adc r10, QWORD PTR [r12+424] mov rax, QWORD PTR [rsi+432] mov QWORD PTR [rsi+424], r10 adc rax, QWORD PTR [r12+432] mov r9, QWORD PTR [rsi+440] mov QWORD PTR [rsi+432], rax adc r9, QWORD PTR [r12+440] mov r10, QWORD PTR [rsi+448] mov QWORD PTR [rsi+440], r9 adc r10, QWORD PTR [r12+448] mov rax, QWORD PTR [rsi+456] mov QWORD PTR [rsi+448], r10 adc rax, QWORD PTR [r12+456] mov r9, QWORD PTR [rsi+464] mov QWORD PTR [rsi+456], rax adc r9, QWORD PTR [r12+464] mov r10, QWORD PTR [rsi+472] mov QWORD PTR [rsi+464], r9 adc r10, QWORD PTR [r12+472] mov rax, QWORD PTR [rsi+480] mov QWORD PTR [rsi+472], r10 adc rax, QWORD PTR [r12+480] mov r9, QWORD PTR [rsi+488] mov QWORD PTR [rsi+480], rax adc r9, QWORD PTR [r12+488] mov r10, QWORD PTR [rsi+496] mov QWORD PTR [rsi+488], r9 adc r10, QWORD PTR [r12+496] mov rax, QWORD PTR [rsi+504] mov QWORD PTR [rsi+496], r10 adc rax, QWORD PTR [r12+504] mov QWORD PTR [rsi+504], rax adc r11, 0 mov QWORD PTR [rcx+768], r11 add rsi, 256 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r13+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r13+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r13+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r13+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r13+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r13+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r13+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r13+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r13+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r13+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r13+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r13+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r13+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r13+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r13+248] mov r10, QWORD PTR [rsi+256] mov QWORD PTR [rsi+248], r9 adc r10, QWORD PTR [r13+256] mov QWORD PTR [rsi+256], r10 ; Add to zero mov rax, QWORD PTR [r13+264] adc rax, 0 mov r9, QWORD PTR [r13+272] mov QWORD PTR [rsi+264], rax adc r9, 0 mov r10, QWORD PTR [r13+280] mov QWORD PTR [rsi+272], r9 adc r10, 0 mov rax, QWORD PTR [r13+288] mov QWORD PTR [rsi+280], r10 adc rax, 0 mov r9, QWORD PTR [r13+296] mov QWORD PTR [rsi+288], rax adc r9, 0 mov r10, QWORD PTR [r13+304] mov QWORD PTR [rsi+296], r9 adc r10, 0 mov rax, QWORD PTR [r13+312] mov QWORD PTR [rsi+304], r10 adc rax, 0 mov r9, QWORD PTR [r13+320] mov QWORD PTR [rsi+312], rax adc r9, 0 mov r10, QWORD PTR [r13+328] mov QWORD PTR [rsi+320], r9 adc r10, 0 mov rax, QWORD PTR [r13+336] mov QWORD PTR [rsi+328], r10 adc rax, 0 mov r9, QWORD PTR [r13+344] mov QWORD PTR [rsi+336], rax adc r9, 0 mov r10, QWORD PTR [r13+352] mov QWORD PTR [rsi+344], r9 adc r10, 0 mov rax, QWORD PTR [r13+360] mov QWORD PTR [rsi+352], r10 adc rax, 0 mov r9, QWORD PTR [r13+368] mov QWORD PTR [rsi+360], rax adc r9, 0 mov r10, QWORD PTR [r13+376] mov QWORD PTR [rsi+368], r9 adc r10, 0 mov rax, QWORD PTR [r13+384] mov QWORD PTR [rsi+376], r10 adc rax, 0 mov r9, QWORD PTR [r13+392] mov QWORD PTR [rsi+384], rax adc r9, 0 mov r10, QWORD PTR [r13+400] mov QWORD PTR [rsi+392], r9 adc r10, 0 mov rax, QWORD PTR [r13+408] mov QWORD PTR [rsi+400], r10 adc rax, 0 mov r9, QWORD PTR [r13+416] mov QWORD PTR [rsi+408], rax adc r9, 0 mov r10, QWORD PTR [r13+424] mov QWORD PTR [rsi+416], r9 adc r10, 0 mov rax, QWORD PTR [r13+432] mov QWORD PTR [rsi+424], r10 adc rax, 0 mov r9, QWORD PTR [r13+440] mov QWORD PTR [rsi+432], rax adc r9, 0 mov r10, QWORD PTR [r13+448] mov QWORD PTR [rsi+440], r9 adc r10, 0 mov rax, QWORD PTR [r13+456] mov QWORD PTR [rsi+448], r10 adc rax, 0 mov r9, QWORD PTR [r13+464] mov QWORD PTR [rsi+456], rax adc r9, 0 mov r10, QWORD PTR [r13+472] mov QWORD PTR [rsi+464], r9 adc r10, 0 mov rax, QWORD PTR [r13+480] mov QWORD PTR [rsi+472], r10 adc rax, 0 mov r9, QWORD PTR [r13+488] mov QWORD PTR [rsi+480], rax adc r9, 0 mov r10, QWORD PTR [r13+496] mov QWORD PTR [rsi+488], r9 adc r10, 0 mov rax, QWORD PTR [r13+504] mov QWORD PTR [rsi+496], r10 adc rax, 0 mov QWORD PTR [rsi+504], rax add rsp, 1576 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_4096_mul_64 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_4096_mul_avx2_64 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 1576 mov QWORD PTR [rsp+1536], rcx mov QWORD PTR [rsp+1544], rdx mov QWORD PTR [rsp+1552], r8 lea r12, QWORD PTR [rsp+1024] lea r14, QWORD PTR [rdx+256] ; Add mov rax, QWORD PTR [rdx] xor r15, r15 add rax, QWORD PTR [r14] mov r9, QWORD PTR [rdx+8] mov QWORD PTR [r12], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [rdx+16] mov QWORD PTR [r12+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [rdx+24] mov QWORD PTR [r12+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [r12+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [r12+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r12+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [rdx+56] mov QWORD PTR [r12+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [rdx+64] mov QWORD PTR [r12+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [rdx+72] mov QWORD PTR [r12+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [r12+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [r12+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r12+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [rdx+104] mov QWORD PTR [r12+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [rdx+112] mov QWORD PTR [r12+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [rdx+120] mov QWORD PTR [r12+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [r12+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [r12+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r12+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [rdx+152] mov QWORD PTR [r12+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [rdx+160] mov QWORD PTR [r12+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [rdx+168] mov QWORD PTR [r12+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [r12+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [r12+176], r9 adc r10, QWORD PTR [r14+184] mov rax, QWORD PTR [rdx+192] mov QWORD PTR [r12+184], r10 adc rax, QWORD PTR [r14+192] mov r9, QWORD PTR [rdx+200] mov QWORD PTR [r12+192], rax adc r9, QWORD PTR [r14+200] mov r10, QWORD PTR [rdx+208] mov QWORD PTR [r12+200], r9 adc r10, QWORD PTR [r14+208] mov rax, QWORD PTR [rdx+216] mov QWORD PTR [r12+208], r10 adc rax, QWORD PTR [r14+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [r12+216], rax adc r9, QWORD PTR [r14+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [r12+224], r9 adc r10, QWORD PTR [r14+232] mov rax, QWORD PTR [rdx+240] mov QWORD PTR [r12+232], r10 adc rax, QWORD PTR [r14+240] mov r9, QWORD PTR [rdx+248] mov QWORD PTR [r12+240], rax adc r9, QWORD PTR [r14+248] mov QWORD PTR [r12+248], r9 adc r15, 0 mov QWORD PTR [rsp+1560], r15 lea r13, QWORD PTR [rsp+1280] lea r14, QWORD PTR [r8+256] ; Add mov rax, QWORD PTR [r8] xor rdi, rdi add rax, QWORD PTR [r14] mov r9, QWORD PTR [r8+8] mov QWORD PTR [r13], rax adc r9, QWORD PTR [r14+8] mov r10, QWORD PTR [r8+16] mov QWORD PTR [r13+8], r9 adc r10, QWORD PTR [r14+16] mov rax, QWORD PTR [r8+24] mov QWORD PTR [r13+16], r10 adc rax, QWORD PTR [r14+24] mov r9, QWORD PTR [r8+32] mov QWORD PTR [r13+24], rax adc r9, QWORD PTR [r14+32] mov r10, QWORD PTR [r8+40] mov QWORD PTR [r13+32], r9 adc r10, QWORD PTR [r14+40] mov rax, QWORD PTR [r8+48] mov QWORD PTR [r13+40], r10 adc rax, QWORD PTR [r14+48] mov r9, QWORD PTR [r8+56] mov QWORD PTR [r13+48], rax adc r9, QWORD PTR [r14+56] mov r10, QWORD PTR [r8+64] mov QWORD PTR [r13+56], r9 adc r10, QWORD PTR [r14+64] mov rax, QWORD PTR [r8+72] mov QWORD PTR [r13+64], r10 adc rax, QWORD PTR [r14+72] mov r9, QWORD PTR [r8+80] mov QWORD PTR [r13+72], rax adc r9, QWORD PTR [r14+80] mov r10, QWORD PTR [r8+88] mov QWORD PTR [r13+80], r9 adc r10, QWORD PTR [r14+88] mov rax, QWORD PTR [r8+96] mov QWORD PTR [r13+88], r10 adc rax, QWORD PTR [r14+96] mov r9, QWORD PTR [r8+104] mov QWORD PTR [r13+96], rax adc r9, QWORD PTR [r14+104] mov r10, QWORD PTR [r8+112] mov QWORD PTR [r13+104], r9 adc r10, QWORD PTR [r14+112] mov rax, QWORD PTR [r8+120] mov QWORD PTR [r13+112], r10 adc rax, QWORD PTR [r14+120] mov r9, QWORD PTR [r8+128] mov QWORD PTR [r13+120], rax adc r9, QWORD PTR [r14+128] mov r10, QWORD PTR [r8+136] mov QWORD PTR [r13+128], r9 adc r10, QWORD PTR [r14+136] mov rax, QWORD PTR [r8+144] mov QWORD PTR [r13+136], r10 adc rax, QWORD PTR [r14+144] mov r9, QWORD PTR [r8+152] mov QWORD PTR [r13+144], rax adc r9, QWORD PTR [r14+152] mov r10, QWORD PTR [r8+160] mov QWORD PTR [r13+152], r9 adc r10, QWORD PTR [r14+160] mov rax, QWORD PTR [r8+168] mov QWORD PTR [r13+160], r10 adc rax, QWORD PTR [r14+168] mov r9, QWORD PTR [r8+176] mov QWORD PTR [r13+168], rax adc r9, QWORD PTR [r14+176] mov r10, QWORD PTR [r8+184] mov QWORD PTR [r13+176], r9 adc r10, QWORD PTR [r14+184] mov rax, QWORD PTR [r8+192] mov QWORD PTR [r13+184], r10 adc rax, QWORD PTR [r14+192] mov r9, QWORD PTR [r8+200] mov QWORD PTR [r13+192], rax adc r9, QWORD PTR [r14+200] mov r10, QWORD PTR [r8+208] mov QWORD PTR [r13+200], r9 adc r10, QWORD PTR [r14+208] mov rax, QWORD PTR [r8+216] mov QWORD PTR [r13+208], r10 adc rax, QWORD PTR [r14+216] mov r9, QWORD PTR [r8+224] mov QWORD PTR [r13+216], rax adc r9, QWORD PTR [r14+224] mov r10, QWORD PTR [r8+232] mov QWORD PTR [r13+224], r9 adc r10, QWORD PTR [r14+232] mov rax, QWORD PTR [r8+240] mov QWORD PTR [r13+232], r10 adc rax, QWORD PTR [r14+240] mov r9, QWORD PTR [r8+248] mov QWORD PTR [r13+240], rax adc r9, QWORD PTR [r14+248] mov QWORD PTR [r13+248], r9 adc rdi, 0 mov QWORD PTR [rsp+1568], rdi mov r8, r13 mov rdx, r12 mov rcx, rsp call sp_2048_mul_avx2_32 mov r8, QWORD PTR [rsp+1552] mov rdx, QWORD PTR [rsp+1544] lea rcx, QWORD PTR [rsp+512] add r8, 256 add rdx, 256 call sp_2048_mul_avx2_32 mov r8, QWORD PTR [rsp+1552] mov rdx, QWORD PTR [rsp+1544] mov rcx, QWORD PTR [rsp+1536] call sp_2048_mul_avx2_32 IFDEF _WIN64 mov r8, QWORD PTR [rsp+1552] mov rdx, QWORD PTR [rsp+1544] mov rcx, QWORD PTR [rsp+1536] ENDIF mov r15, QWORD PTR [rsp+1560] mov rdi, QWORD PTR [rsp+1568] mov rsi, QWORD PTR [rsp+1536] mov r11, r15 lea r12, QWORD PTR [rsp+1024] lea r13, QWORD PTR [rsp+1280] and r11, rdi neg r15 neg rdi add rsi, 512 mov rax, QWORD PTR [r12] mov r9, QWORD PTR [r13] pext rax, rax, rdi pext r9, r9, r15 add rax, r9 mov r9, QWORD PTR [r12+8] mov r10, QWORD PTR [r13+8] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi], rax adc r9, r10 mov r10, QWORD PTR [r12+16] mov rax, QWORD PTR [r13+16] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+8], r9 adc r10, rax mov rax, QWORD PTR [r12+24] mov r9, QWORD PTR [r13+24] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+16], r10 adc rax, r9 mov r9, QWORD PTR [r12+32] mov r10, QWORD PTR [r13+32] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+24], rax adc r9, r10 mov r10, QWORD PTR [r12+40] mov rax, QWORD PTR [r13+40] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+32], r9 adc r10, rax mov rax, QWORD PTR [r12+48] mov r9, QWORD PTR [r13+48] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+40], r10 adc rax, r9 mov r9, QWORD PTR [r12+56] mov r10, QWORD PTR [r13+56] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+48], rax adc r9, r10 mov r10, QWORD PTR [r12+64] mov rax, QWORD PTR [r13+64] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+56], r9 adc r10, rax mov rax, QWORD PTR [r12+72] mov r9, QWORD PTR [r13+72] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+64], r10 adc rax, r9 mov r9, QWORD PTR [r12+80] mov r10, QWORD PTR [r13+80] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+72], rax adc r9, r10 mov r10, QWORD PTR [r12+88] mov rax, QWORD PTR [r13+88] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+80], r9 adc r10, rax mov rax, QWORD PTR [r12+96] mov r9, QWORD PTR [r13+96] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+88], r10 adc rax, r9 mov r9, QWORD PTR [r12+104] mov r10, QWORD PTR [r13+104] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+96], rax adc r9, r10 mov r10, QWORD PTR [r12+112] mov rax, QWORD PTR [r13+112] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+104], r9 adc r10, rax mov rax, QWORD PTR [r12+120] mov r9, QWORD PTR [r13+120] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+112], r10 adc rax, r9 mov r9, QWORD PTR [r12+128] mov r10, QWORD PTR [r13+128] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+120], rax adc r9, r10 mov r10, QWORD PTR [r12+136] mov rax, QWORD PTR [r13+136] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+128], r9 adc r10, rax mov rax, QWORD PTR [r12+144] mov r9, QWORD PTR [r13+144] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+136], r10 adc rax, r9 mov r9, QWORD PTR [r12+152] mov r10, QWORD PTR [r13+152] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+144], rax adc r9, r10 mov r10, QWORD PTR [r12+160] mov rax, QWORD PTR [r13+160] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+152], r9 adc r10, rax mov rax, QWORD PTR [r12+168] mov r9, QWORD PTR [r13+168] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+160], r10 adc rax, r9 mov r9, QWORD PTR [r12+176] mov r10, QWORD PTR [r13+176] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+168], rax adc r9, r10 mov r10, QWORD PTR [r12+184] mov rax, QWORD PTR [r13+184] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+176], r9 adc r10, rax mov rax, QWORD PTR [r12+192] mov r9, QWORD PTR [r13+192] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+184], r10 adc rax, r9 mov r9, QWORD PTR [r12+200] mov r10, QWORD PTR [r13+200] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+192], rax adc r9, r10 mov r10, QWORD PTR [r12+208] mov rax, QWORD PTR [r13+208] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+200], r9 adc r10, rax mov rax, QWORD PTR [r12+216] mov r9, QWORD PTR [r13+216] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+208], r10 adc rax, r9 mov r9, QWORD PTR [r12+224] mov r10, QWORD PTR [r13+224] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+216], rax adc r9, r10 mov r10, QWORD PTR [r12+232] mov rax, QWORD PTR [r13+232] pext r10, r10, rdi pext rax, rax, r15 mov QWORD PTR [rsi+224], r9 adc r10, rax mov rax, QWORD PTR [r12+240] mov r9, QWORD PTR [r13+240] pext rax, rax, rdi pext r9, r9, r15 mov QWORD PTR [rsi+232], r10 adc rax, r9 mov r9, QWORD PTR [r12+248] mov r10, QWORD PTR [r13+248] pext r9, r9, rdi pext r10, r10, r15 mov QWORD PTR [rsi+240], rax adc r9, r10 mov QWORD PTR [rsi+248], r9 adc r11, 0 lea r13, QWORD PTR [rsp+512] mov r12, rsp mov rax, QWORD PTR [r12] sub rax, QWORD PTR [r13] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [r13+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [r13+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [r13+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [r13+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [r13+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [r13+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [r13+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [r13+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [r13+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [r13+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [r13+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [r13+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [r13+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [r13+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [r13+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [r13+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [r13+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [r13+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [r13+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [r13+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [r13+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [r13+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [r13+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [r13+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [r13+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [r13+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [r13+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [r13+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [r13+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [r13+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [r13+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [r13+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [r13+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [r13+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [r13+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [r13+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [r13+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [r13+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [r13+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [r13+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [r13+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [r13+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [r13+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [r13+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [r13+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [r13+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [r13+376] mov rax, QWORD PTR [r12+384] mov QWORD PTR [r12+376], r10 sbb rax, QWORD PTR [r13+384] mov r9, QWORD PTR [r12+392] mov QWORD PTR [r12+384], rax sbb r9, QWORD PTR [r13+392] mov r10, QWORD PTR [r12+400] mov QWORD PTR [r12+392], r9 sbb r10, QWORD PTR [r13+400] mov rax, QWORD PTR [r12+408] mov QWORD PTR [r12+400], r10 sbb rax, QWORD PTR [r13+408] mov r9, QWORD PTR [r12+416] mov QWORD PTR [r12+408], rax sbb r9, QWORD PTR [r13+416] mov r10, QWORD PTR [r12+424] mov QWORD PTR [r12+416], r9 sbb r10, QWORD PTR [r13+424] mov rax, QWORD PTR [r12+432] mov QWORD PTR [r12+424], r10 sbb rax, QWORD PTR [r13+432] mov r9, QWORD PTR [r12+440] mov QWORD PTR [r12+432], rax sbb r9, QWORD PTR [r13+440] mov r10, QWORD PTR [r12+448] mov QWORD PTR [r12+440], r9 sbb r10, QWORD PTR [r13+448] mov rax, QWORD PTR [r12+456] mov QWORD PTR [r12+448], r10 sbb rax, QWORD PTR [r13+456] mov r9, QWORD PTR [r12+464] mov QWORD PTR [r12+456], rax sbb r9, QWORD PTR [r13+464] mov r10, QWORD PTR [r12+472] mov QWORD PTR [r12+464], r9 sbb r10, QWORD PTR [r13+472] mov rax, QWORD PTR [r12+480] mov QWORD PTR [r12+472], r10 sbb rax, QWORD PTR [r13+480] mov r9, QWORD PTR [r12+488] mov QWORD PTR [r12+480], rax sbb r9, QWORD PTR [r13+488] mov r10, QWORD PTR [r12+496] mov QWORD PTR [r12+488], r9 sbb r10, QWORD PTR [r13+496] mov rax, QWORD PTR [r12+504] mov QWORD PTR [r12+496], r10 sbb rax, QWORD PTR [r13+504] mov QWORD PTR [r12+504], rax sbb r11, 0 mov rax, QWORD PTR [r12] sub rax, QWORD PTR [rcx] mov r9, QWORD PTR [r12+8] mov QWORD PTR [r12], rax sbb r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [r12+16] mov QWORD PTR [r12+8], r9 sbb r10, QWORD PTR [rcx+16] mov rax, QWORD PTR [r12+24] mov QWORD PTR [r12+16], r10 sbb rax, QWORD PTR [rcx+24] mov r9, QWORD PTR [r12+32] mov QWORD PTR [r12+24], rax sbb r9, QWORD PTR [rcx+32] mov r10, QWORD PTR [r12+40] mov QWORD PTR [r12+32], r9 sbb r10, QWORD PTR [rcx+40] mov rax, QWORD PTR [r12+48] mov QWORD PTR [r12+40], r10 sbb rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [r12+56] mov QWORD PTR [r12+48], rax sbb r9, QWORD PTR [rcx+56] mov r10, QWORD PTR [r12+64] mov QWORD PTR [r12+56], r9 sbb r10, QWORD PTR [rcx+64] mov rax, QWORD PTR [r12+72] mov QWORD PTR [r12+64], r10 sbb rax, QWORD PTR [rcx+72] mov r9, QWORD PTR [r12+80] mov QWORD PTR [r12+72], rax sbb r9, QWORD PTR [rcx+80] mov r10, QWORD PTR [r12+88] mov QWORD PTR [r12+80], r9 sbb r10, QWORD PTR [rcx+88] mov rax, QWORD PTR [r12+96] mov QWORD PTR [r12+88], r10 sbb rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [r12+104] mov QWORD PTR [r12+96], rax sbb r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [r12+112] mov QWORD PTR [r12+104], r9 sbb r10, QWORD PTR [rcx+112] mov rax, QWORD PTR [r12+120] mov QWORD PTR [r12+112], r10 sbb rax, QWORD PTR [rcx+120] mov r9, QWORD PTR [r12+128] mov QWORD PTR [r12+120], rax sbb r9, QWORD PTR [rcx+128] mov r10, QWORD PTR [r12+136] mov QWORD PTR [r12+128], r9 sbb r10, QWORD PTR [rcx+136] mov rax, QWORD PTR [r12+144] mov QWORD PTR [r12+136], r10 sbb rax, QWORD PTR [rcx+144] mov r9, QWORD PTR [r12+152] mov QWORD PTR [r12+144], rax sbb r9, QWORD PTR [rcx+152] mov r10, QWORD PTR [r12+160] mov QWORD PTR [r12+152], r9 sbb r10, QWORD PTR [rcx+160] mov rax, QWORD PTR [r12+168] mov QWORD PTR [r12+160], r10 sbb rax, QWORD PTR [rcx+168] mov r9, QWORD PTR [r12+176] mov QWORD PTR [r12+168], rax sbb r9, QWORD PTR [rcx+176] mov r10, QWORD PTR [r12+184] mov QWORD PTR [r12+176], r9 sbb r10, QWORD PTR [rcx+184] mov rax, QWORD PTR [r12+192] mov QWORD PTR [r12+184], r10 sbb rax, QWORD PTR [rcx+192] mov r9, QWORD PTR [r12+200] mov QWORD PTR [r12+192], rax sbb r9, QWORD PTR [rcx+200] mov r10, QWORD PTR [r12+208] mov QWORD PTR [r12+200], r9 sbb r10, QWORD PTR [rcx+208] mov rax, QWORD PTR [r12+216] mov QWORD PTR [r12+208], r10 sbb rax, QWORD PTR [rcx+216] mov r9, QWORD PTR [r12+224] mov QWORD PTR [r12+216], rax sbb r9, QWORD PTR [rcx+224] mov r10, QWORD PTR [r12+232] mov QWORD PTR [r12+224], r9 sbb r10, QWORD PTR [rcx+232] mov rax, QWORD PTR [r12+240] mov QWORD PTR [r12+232], r10 sbb rax, QWORD PTR [rcx+240] mov r9, QWORD PTR [r12+248] mov QWORD PTR [r12+240], rax sbb r9, QWORD PTR [rcx+248] mov r10, QWORD PTR [r12+256] mov QWORD PTR [r12+248], r9 sbb r10, QWORD PTR [rcx+256] mov rax, QWORD PTR [r12+264] mov QWORD PTR [r12+256], r10 sbb rax, QWORD PTR [rcx+264] mov r9, QWORD PTR [r12+272] mov QWORD PTR [r12+264], rax sbb r9, QWORD PTR [rcx+272] mov r10, QWORD PTR [r12+280] mov QWORD PTR [r12+272], r9 sbb r10, QWORD PTR [rcx+280] mov rax, QWORD PTR [r12+288] mov QWORD PTR [r12+280], r10 sbb rax, QWORD PTR [rcx+288] mov r9, QWORD PTR [r12+296] mov QWORD PTR [r12+288], rax sbb r9, QWORD PTR [rcx+296] mov r10, QWORD PTR [r12+304] mov QWORD PTR [r12+296], r9 sbb r10, QWORD PTR [rcx+304] mov rax, QWORD PTR [r12+312] mov QWORD PTR [r12+304], r10 sbb rax, QWORD PTR [rcx+312] mov r9, QWORD PTR [r12+320] mov QWORD PTR [r12+312], rax sbb r9, QWORD PTR [rcx+320] mov r10, QWORD PTR [r12+328] mov QWORD PTR [r12+320], r9 sbb r10, QWORD PTR [rcx+328] mov rax, QWORD PTR [r12+336] mov QWORD PTR [r12+328], r10 sbb rax, QWORD PTR [rcx+336] mov r9, QWORD PTR [r12+344] mov QWORD PTR [r12+336], rax sbb r9, QWORD PTR [rcx+344] mov r10, QWORD PTR [r12+352] mov QWORD PTR [r12+344], r9 sbb r10, QWORD PTR [rcx+352] mov rax, QWORD PTR [r12+360] mov QWORD PTR [r12+352], r10 sbb rax, QWORD PTR [rcx+360] mov r9, QWORD PTR [r12+368] mov QWORD PTR [r12+360], rax sbb r9, QWORD PTR [rcx+368] mov r10, QWORD PTR [r12+376] mov QWORD PTR [r12+368], r9 sbb r10, QWORD PTR [rcx+376] mov rax, QWORD PTR [r12+384] mov QWORD PTR [r12+376], r10 sbb rax, QWORD PTR [rcx+384] mov r9, QWORD PTR [r12+392] mov QWORD PTR [r12+384], rax sbb r9, QWORD PTR [rcx+392] mov r10, QWORD PTR [r12+400] mov QWORD PTR [r12+392], r9 sbb r10, QWORD PTR [rcx+400] mov rax, QWORD PTR [r12+408] mov QWORD PTR [r12+400], r10 sbb rax, QWORD PTR [rcx+408] mov r9, QWORD PTR [r12+416] mov QWORD PTR [r12+408], rax sbb r9, QWORD PTR [rcx+416] mov r10, QWORD PTR [r12+424] mov QWORD PTR [r12+416], r9 sbb r10, QWORD PTR [rcx+424] mov rax, QWORD PTR [r12+432] mov QWORD PTR [r12+424], r10 sbb rax, QWORD PTR [rcx+432] mov r9, QWORD PTR [r12+440] mov QWORD PTR [r12+432], rax sbb r9, QWORD PTR [rcx+440] mov r10, QWORD PTR [r12+448] mov QWORD PTR [r12+440], r9 sbb r10, QWORD PTR [rcx+448] mov rax, QWORD PTR [r12+456] mov QWORD PTR [r12+448], r10 sbb rax, QWORD PTR [rcx+456] mov r9, QWORD PTR [r12+464] mov QWORD PTR [r12+456], rax sbb r9, QWORD PTR [rcx+464] mov r10, QWORD PTR [r12+472] mov QWORD PTR [r12+464], r9 sbb r10, QWORD PTR [rcx+472] mov rax, QWORD PTR [r12+480] mov QWORD PTR [r12+472], r10 sbb rax, QWORD PTR [rcx+480] mov r9, QWORD PTR [r12+488] mov QWORD PTR [r12+480], rax sbb r9, QWORD PTR [rcx+488] mov r10, QWORD PTR [r12+496] mov QWORD PTR [r12+488], r9 sbb r10, QWORD PTR [rcx+496] mov rax, QWORD PTR [r12+504] mov QWORD PTR [r12+496], r10 sbb rax, QWORD PTR [rcx+504] mov QWORD PTR [r12+504], rax sbb r11, 0 sub rsi, 256 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r12] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r12+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r12+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r12+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r12+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r12+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r12+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r12+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r12+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r12+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r12+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r12+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r12+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r12+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r12+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r12+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r12+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r12+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r12+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r12+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r12+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r12+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r12+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r12+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r12+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r12+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r12+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r12+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r12+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r12+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r12+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r12+248] mov r10, QWORD PTR [rsi+256] mov QWORD PTR [rsi+248], r9 adc r10, QWORD PTR [r12+256] mov rax, QWORD PTR [rsi+264] mov QWORD PTR [rsi+256], r10 adc rax, QWORD PTR [r12+264] mov r9, QWORD PTR [rsi+272] mov QWORD PTR [rsi+264], rax adc r9, QWORD PTR [r12+272] mov r10, QWORD PTR [rsi+280] mov QWORD PTR [rsi+272], r9 adc r10, QWORD PTR [r12+280] mov rax, QWORD PTR [rsi+288] mov QWORD PTR [rsi+280], r10 adc rax, QWORD PTR [r12+288] mov r9, QWORD PTR [rsi+296] mov QWORD PTR [rsi+288], rax adc r9, QWORD PTR [r12+296] mov r10, QWORD PTR [rsi+304] mov QWORD PTR [rsi+296], r9 adc r10, QWORD PTR [r12+304] mov rax, QWORD PTR [rsi+312] mov QWORD PTR [rsi+304], r10 adc rax, QWORD PTR [r12+312] mov r9, QWORD PTR [rsi+320] mov QWORD PTR [rsi+312], rax adc r9, QWORD PTR [r12+320] mov r10, QWORD PTR [rsi+328] mov QWORD PTR [rsi+320], r9 adc r10, QWORD PTR [r12+328] mov rax, QWORD PTR [rsi+336] mov QWORD PTR [rsi+328], r10 adc rax, QWORD PTR [r12+336] mov r9, QWORD PTR [rsi+344] mov QWORD PTR [rsi+336], rax adc r9, QWORD PTR [r12+344] mov r10, QWORD PTR [rsi+352] mov QWORD PTR [rsi+344], r9 adc r10, QWORD PTR [r12+352] mov rax, QWORD PTR [rsi+360] mov QWORD PTR [rsi+352], r10 adc rax, QWORD PTR [r12+360] mov r9, QWORD PTR [rsi+368] mov QWORD PTR [rsi+360], rax adc r9, QWORD PTR [r12+368] mov r10, QWORD PTR [rsi+376] mov QWORD PTR [rsi+368], r9 adc r10, QWORD PTR [r12+376] mov rax, QWORD PTR [rsi+384] mov QWORD PTR [rsi+376], r10 adc rax, QWORD PTR [r12+384] mov r9, QWORD PTR [rsi+392] mov QWORD PTR [rsi+384], rax adc r9, QWORD PTR [r12+392] mov r10, QWORD PTR [rsi+400] mov QWORD PTR [rsi+392], r9 adc r10, QWORD PTR [r12+400] mov rax, QWORD PTR [rsi+408] mov QWORD PTR [rsi+400], r10 adc rax, QWORD PTR [r12+408] mov r9, QWORD PTR [rsi+416] mov QWORD PTR [rsi+408], rax adc r9, QWORD PTR [r12+416] mov r10, QWORD PTR [rsi+424] mov QWORD PTR [rsi+416], r9 adc r10, QWORD PTR [r12+424] mov rax, QWORD PTR [rsi+432] mov QWORD PTR [rsi+424], r10 adc rax, QWORD PTR [r12+432] mov r9, QWORD PTR [rsi+440] mov QWORD PTR [rsi+432], rax adc r9, QWORD PTR [r12+440] mov r10, QWORD PTR [rsi+448] mov QWORD PTR [rsi+440], r9 adc r10, QWORD PTR [r12+448] mov rax, QWORD PTR [rsi+456] mov QWORD PTR [rsi+448], r10 adc rax, QWORD PTR [r12+456] mov r9, QWORD PTR [rsi+464] mov QWORD PTR [rsi+456], rax adc r9, QWORD PTR [r12+464] mov r10, QWORD PTR [rsi+472] mov QWORD PTR [rsi+464], r9 adc r10, QWORD PTR [r12+472] mov rax, QWORD PTR [rsi+480] mov QWORD PTR [rsi+472], r10 adc rax, QWORD PTR [r12+480] mov r9, QWORD PTR [rsi+488] mov QWORD PTR [rsi+480], rax adc r9, QWORD PTR [r12+488] mov r10, QWORD PTR [rsi+496] mov QWORD PTR [rsi+488], r9 adc r10, QWORD PTR [r12+496] mov rax, QWORD PTR [rsi+504] mov QWORD PTR [rsi+496], r10 adc rax, QWORD PTR [r12+504] mov QWORD PTR [rsi+504], rax adc r11, 0 mov QWORD PTR [rcx+768], r11 add rsi, 256 ; Add mov rax, QWORD PTR [rsi] add rax, QWORD PTR [r13] mov r9, QWORD PTR [rsi+8] mov QWORD PTR [rsi], rax adc r9, QWORD PTR [r13+8] mov r10, QWORD PTR [rsi+16] mov QWORD PTR [rsi+8], r9 adc r10, QWORD PTR [r13+16] mov rax, QWORD PTR [rsi+24] mov QWORD PTR [rsi+16], r10 adc rax, QWORD PTR [r13+24] mov r9, QWORD PTR [rsi+32] mov QWORD PTR [rsi+24], rax adc r9, QWORD PTR [r13+32] mov r10, QWORD PTR [rsi+40] mov QWORD PTR [rsi+32], r9 adc r10, QWORD PTR [r13+40] mov rax, QWORD PTR [rsi+48] mov QWORD PTR [rsi+40], r10 adc rax, QWORD PTR [r13+48] mov r9, QWORD PTR [rsi+56] mov QWORD PTR [rsi+48], rax adc r9, QWORD PTR [r13+56] mov r10, QWORD PTR [rsi+64] mov QWORD PTR [rsi+56], r9 adc r10, QWORD PTR [r13+64] mov rax, QWORD PTR [rsi+72] mov QWORD PTR [rsi+64], r10 adc rax, QWORD PTR [r13+72] mov r9, QWORD PTR [rsi+80] mov QWORD PTR [rsi+72], rax adc r9, QWORD PTR [r13+80] mov r10, QWORD PTR [rsi+88] mov QWORD PTR [rsi+80], r9 adc r10, QWORD PTR [r13+88] mov rax, QWORD PTR [rsi+96] mov QWORD PTR [rsi+88], r10 adc rax, QWORD PTR [r13+96] mov r9, QWORD PTR [rsi+104] mov QWORD PTR [rsi+96], rax adc r9, QWORD PTR [r13+104] mov r10, QWORD PTR [rsi+112] mov QWORD PTR [rsi+104], r9 adc r10, QWORD PTR [r13+112] mov rax, QWORD PTR [rsi+120] mov QWORD PTR [rsi+112], r10 adc rax, QWORD PTR [r13+120] mov r9, QWORD PTR [rsi+128] mov QWORD PTR [rsi+120], rax adc r9, QWORD PTR [r13+128] mov r10, QWORD PTR [rsi+136] mov QWORD PTR [rsi+128], r9 adc r10, QWORD PTR [r13+136] mov rax, QWORD PTR [rsi+144] mov QWORD PTR [rsi+136], r10 adc rax, QWORD PTR [r13+144] mov r9, QWORD PTR [rsi+152] mov QWORD PTR [rsi+144], rax adc r9, QWORD PTR [r13+152] mov r10, QWORD PTR [rsi+160] mov QWORD PTR [rsi+152], r9 adc r10, QWORD PTR [r13+160] mov rax, QWORD PTR [rsi+168] mov QWORD PTR [rsi+160], r10 adc rax, QWORD PTR [r13+168] mov r9, QWORD PTR [rsi+176] mov QWORD PTR [rsi+168], rax adc r9, QWORD PTR [r13+176] mov r10, QWORD PTR [rsi+184] mov QWORD PTR [rsi+176], r9 adc r10, QWORD PTR [r13+184] mov rax, QWORD PTR [rsi+192] mov QWORD PTR [rsi+184], r10 adc rax, QWORD PTR [r13+192] mov r9, QWORD PTR [rsi+200] mov QWORD PTR [rsi+192], rax adc r9, QWORD PTR [r13+200] mov r10, QWORD PTR [rsi+208] mov QWORD PTR [rsi+200], r9 adc r10, QWORD PTR [r13+208] mov rax, QWORD PTR [rsi+216] mov QWORD PTR [rsi+208], r10 adc rax, QWORD PTR [r13+216] mov r9, QWORD PTR [rsi+224] mov QWORD PTR [rsi+216], rax adc r9, QWORD PTR [r13+224] mov r10, QWORD PTR [rsi+232] mov QWORD PTR [rsi+224], r9 adc r10, QWORD PTR [r13+232] mov rax, QWORD PTR [rsi+240] mov QWORD PTR [rsi+232], r10 adc rax, QWORD PTR [r13+240] mov r9, QWORD PTR [rsi+248] mov QWORD PTR [rsi+240], rax adc r9, QWORD PTR [r13+248] mov r10, QWORD PTR [rsi+256] mov QWORD PTR [rsi+248], r9 adc r10, QWORD PTR [r13+256] mov QWORD PTR [rsi+256], r10 ; Add to zero mov rax, QWORD PTR [r13+264] adc rax, 0 mov r9, QWORD PTR [r13+272] mov QWORD PTR [rsi+264], rax adc r9, 0 mov r10, QWORD PTR [r13+280] mov QWORD PTR [rsi+272], r9 adc r10, 0 mov rax, QWORD PTR [r13+288] mov QWORD PTR [rsi+280], r10 adc rax, 0 mov r9, QWORD PTR [r13+296] mov QWORD PTR [rsi+288], rax adc r9, 0 mov r10, QWORD PTR [r13+304] mov QWORD PTR [rsi+296], r9 adc r10, 0 mov rax, QWORD PTR [r13+312] mov QWORD PTR [rsi+304], r10 adc rax, 0 mov r9, QWORD PTR [r13+320] mov QWORD PTR [rsi+312], rax adc r9, 0 mov r10, QWORD PTR [r13+328] mov QWORD PTR [rsi+320], r9 adc r10, 0 mov rax, QWORD PTR [r13+336] mov QWORD PTR [rsi+328], r10 adc rax, 0 mov r9, QWORD PTR [r13+344] mov QWORD PTR [rsi+336], rax adc r9, 0 mov r10, QWORD PTR [r13+352] mov QWORD PTR [rsi+344], r9 adc r10, 0 mov rax, QWORD PTR [r13+360] mov QWORD PTR [rsi+352], r10 adc rax, 0 mov r9, QWORD PTR [r13+368] mov QWORD PTR [rsi+360], rax adc r9, 0 mov r10, QWORD PTR [r13+376] mov QWORD PTR [rsi+368], r9 adc r10, 0 mov rax, QWORD PTR [r13+384] mov QWORD PTR [rsi+376], r10 adc rax, 0 mov r9, QWORD PTR [r13+392] mov QWORD PTR [rsi+384], rax adc r9, 0 mov r10, QWORD PTR [r13+400] mov QWORD PTR [rsi+392], r9 adc r10, 0 mov rax, QWORD PTR [r13+408] mov QWORD PTR [rsi+400], r10 adc rax, 0 mov r9, QWORD PTR [r13+416] mov QWORD PTR [rsi+408], rax adc r9, 0 mov r10, QWORD PTR [r13+424] mov QWORD PTR [rsi+416], r9 adc r10, 0 mov rax, QWORD PTR [r13+432] mov QWORD PTR [rsi+424], r10 adc rax, 0 mov r9, QWORD PTR [r13+440] mov QWORD PTR [rsi+432], rax adc r9, 0 mov r10, QWORD PTR [r13+448] mov QWORD PTR [rsi+440], r9 adc r10, 0 mov rax, QWORD PTR [r13+456] mov QWORD PTR [rsi+448], r10 adc rax, 0 mov r9, QWORD PTR [r13+464] mov QWORD PTR [rsi+456], rax adc r9, 0 mov r10, QWORD PTR [r13+472] mov QWORD PTR [rsi+464], r9 adc r10, 0 mov rax, QWORD PTR [r13+480] mov QWORD PTR [rsi+472], r10 adc rax, 0 mov r9, QWORD PTR [r13+488] mov QWORD PTR [rsi+480], rax adc r9, 0 mov r10, QWORD PTR [r13+496] mov QWORD PTR [rsi+488], r9 adc r10, 0 mov rax, QWORD PTR [r13+504] mov QWORD PTR [rsi+496], r10 adc rax, 0 mov QWORD PTR [rsi+504], rax add rsp, 1576 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_4096_mul_avx2_64 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_4096_sqr_64 PROC sub rsp, 528 mov QWORD PTR [rsp+512], rcx mov QWORD PTR [rsp+520], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+256] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r10+88], r8 sbb rax, QWORD PTR [r11+96] mov r8, QWORD PTR [rdx+104] mov QWORD PTR [r10+96], rax sbb r8, QWORD PTR [r11+104] mov rax, QWORD PTR [rdx+112] mov QWORD PTR [r10+104], r8 sbb rax, QWORD PTR [r11+112] mov r8, QWORD PTR [rdx+120] mov QWORD PTR [r10+112], rax sbb r8, QWORD PTR [r11+120] mov rax, QWORD PTR [rdx+128] mov QWORD PTR [r10+120], r8 sbb rax, QWORD PTR [r11+128] mov r8, QWORD PTR [rdx+136] mov QWORD PTR [r10+128], rax sbb r8, QWORD PTR [r11+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r10+136], r8 sbb rax, QWORD PTR [r11+144] mov r8, QWORD PTR [rdx+152] mov QWORD PTR [r10+144], rax sbb r8, QWORD PTR [r11+152] mov rax, QWORD PTR [rdx+160] mov QWORD PTR [r10+152], r8 sbb rax, QWORD PTR [r11+160] mov r8, QWORD PTR [rdx+168] mov QWORD PTR [r10+160], rax sbb r8, QWORD PTR [r11+168] mov rax, QWORD PTR [rdx+176] mov QWORD PTR [r10+168], r8 sbb rax, QWORD PTR [r11+176] mov r8, QWORD PTR [rdx+184] mov QWORD PTR [r10+176], rax sbb r8, QWORD PTR [r11+184] mov rax, QWORD PTR [rdx+192] mov QWORD PTR [r10+184], r8 sbb rax, QWORD PTR [r11+192] mov r8, QWORD PTR [rdx+200] mov QWORD PTR [r10+192], rax sbb r8, QWORD PTR [r11+200] mov rax, QWORD PTR [rdx+208] mov QWORD PTR [r10+200], r8 sbb rax, QWORD PTR [r11+208] mov r8, QWORD PTR [rdx+216] mov QWORD PTR [r10+208], rax sbb r8, QWORD PTR [r11+216] mov rax, QWORD PTR [rdx+224] mov QWORD PTR [r10+216], r8 sbb rax, QWORD PTR [r11+224] mov r8, QWORD PTR [rdx+232] mov QWORD PTR [r10+224], rax sbb r8, QWORD PTR [r11+232] mov rax, QWORD PTR [rdx+240] mov QWORD PTR [r10+232], r8 sbb rax, QWORD PTR [r11+240] mov r8, QWORD PTR [rdx+248] mov QWORD PTR [r10+240], rax sbb r8, QWORD PTR [r11+248] mov QWORD PTR [r10+248], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+96] setc r11b mov QWORD PTR [r10+88], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+104] setc r11b mov QWORD PTR [r10+96], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+112] setc r11b mov QWORD PTR [r10+104], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+120] setc r11b mov QWORD PTR [r10+112], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+128] setc r11b mov QWORD PTR [r10+120], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+136] setc r11b mov QWORD PTR [r10+128], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+144] setc r11b mov QWORD PTR [r10+136], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+152] setc r11b mov QWORD PTR [r10+144], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+160] setc r11b mov QWORD PTR [r10+152], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+168] setc r11b mov QWORD PTR [r10+160], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+176] setc r11b mov QWORD PTR [r10+168], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+184] setc r11b mov QWORD PTR [r10+176], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+192] setc r11b mov QWORD PTR [r10+184], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+200] setc r11b mov QWORD PTR [r10+192], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+208] setc r11b mov QWORD PTR [r10+200], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+216] setc r11b mov QWORD PTR [r10+208], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+224] setc r11b mov QWORD PTR [r10+216], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+232] setc r11b mov QWORD PTR [r10+224], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+240] setc r11b mov QWORD PTR [r10+232], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+248] setc r11b mov QWORD PTR [r10+240], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+248], r8 mov rdx, r10 mov rcx, rsp call sp_2048_sqr_32 mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] add rdx, 256 add rcx, 512 call sp_2048_sqr_32 mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] call sp_2048_sqr_32 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] ENDIF mov rdx, QWORD PTR [rsp+512] lea r10, QWORD PTR [rsp+256] add rdx, 768 mov r9, 0 mov r8, QWORD PTR [r10+-256] sub r8, QWORD PTR [rdx+-256] mov rax, QWORD PTR [r10+-248] mov QWORD PTR [r10+-256], r8 sbb rax, QWORD PTR [rdx+-248] mov r8, QWORD PTR [r10+-240] mov QWORD PTR [r10+-248], rax sbb r8, QWORD PTR [rdx+-240] mov rax, QWORD PTR [r10+-232] mov QWORD PTR [r10+-240], r8 sbb rax, QWORD PTR [rdx+-232] mov r8, QWORD PTR [r10+-224] mov QWORD PTR [r10+-232], rax sbb r8, QWORD PTR [rdx+-224] mov rax, QWORD PTR [r10+-216] mov QWORD PTR [r10+-224], r8 sbb rax, QWORD PTR [rdx+-216] mov r8, QWORD PTR [r10+-208] mov QWORD PTR [r10+-216], rax sbb r8, QWORD PTR [rdx+-208] mov rax, QWORD PTR [r10+-200] mov QWORD PTR [r10+-208], r8 sbb rax, QWORD PTR [rdx+-200] mov r8, QWORD PTR [r10+-192] mov QWORD PTR [r10+-200], rax sbb r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov r8, QWORD PTR [r10+192] mov QWORD PTR [r10+184], rax sbb r8, QWORD PTR [rdx+192] mov rax, QWORD PTR [r10+200] mov QWORD PTR [r10+192], r8 sbb rax, QWORD PTR [rdx+200] mov r8, QWORD PTR [r10+208] mov QWORD PTR [r10+200], rax sbb r8, QWORD PTR [rdx+208] mov rax, QWORD PTR [r10+216] mov QWORD PTR [r10+208], r8 sbb rax, QWORD PTR [rdx+216] mov r8, QWORD PTR [r10+224] mov QWORD PTR [r10+216], rax sbb r8, QWORD PTR [rdx+224] mov rax, QWORD PTR [r10+232] mov QWORD PTR [r10+224], r8 sbb rax, QWORD PTR [rdx+232] mov r8, QWORD PTR [r10+240] mov QWORD PTR [r10+232], rax sbb r8, QWORD PTR [rdx+240] mov rax, QWORD PTR [r10+248] mov QWORD PTR [r10+240], r8 sbb rax, QWORD PTR [rdx+248] mov QWORD PTR [r10+248], rax sbb r9, 0 sub rdx, 512 mov r8, QWORD PTR [r10+-256] sub r8, QWORD PTR [rdx+-256] mov rax, QWORD PTR [r10+-248] mov QWORD PTR [r10+-256], r8 sbb rax, QWORD PTR [rdx+-248] mov r8, QWORD PTR [r10+-240] mov QWORD PTR [r10+-248], rax sbb r8, QWORD PTR [rdx+-240] mov rax, QWORD PTR [r10+-232] mov QWORD PTR [r10+-240], r8 sbb rax, QWORD PTR [rdx+-232] mov r8, QWORD PTR [r10+-224] mov QWORD PTR [r10+-232], rax sbb r8, QWORD PTR [rdx+-224] mov rax, QWORD PTR [r10+-216] mov QWORD PTR [r10+-224], r8 sbb rax, QWORD PTR [rdx+-216] mov r8, QWORD PTR [r10+-208] mov QWORD PTR [r10+-216], rax sbb r8, QWORD PTR [rdx+-208] mov rax, QWORD PTR [r10+-200] mov QWORD PTR [r10+-208], r8 sbb rax, QWORD PTR [rdx+-200] mov r8, QWORD PTR [r10+-192] mov QWORD PTR [r10+-200], rax sbb r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov r8, QWORD PTR [r10+192] mov QWORD PTR [r10+184], rax sbb r8, QWORD PTR [rdx+192] mov rax, QWORD PTR [r10+200] mov QWORD PTR [r10+192], r8 sbb rax, QWORD PTR [rdx+200] mov r8, QWORD PTR [r10+208] mov QWORD PTR [r10+200], rax sbb r8, QWORD PTR [rdx+208] mov rax, QWORD PTR [r10+216] mov QWORD PTR [r10+208], r8 sbb rax, QWORD PTR [rdx+216] mov r8, QWORD PTR [r10+224] mov QWORD PTR [r10+216], rax sbb r8, QWORD PTR [rdx+224] mov rax, QWORD PTR [r10+232] mov QWORD PTR [r10+224], r8 sbb rax, QWORD PTR [rdx+232] mov r8, QWORD PTR [r10+240] mov QWORD PTR [r10+232], rax sbb r8, QWORD PTR [rdx+240] mov rax, QWORD PTR [r10+248] mov QWORD PTR [r10+240], r8 sbb rax, QWORD PTR [rdx+248] mov QWORD PTR [r10+248], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+512] neg r9 add rcx, 512 mov r8, QWORD PTR [rcx+-256] sub r8, QWORD PTR [r10+-256] mov rax, QWORD PTR [rcx+-248] mov QWORD PTR [rcx+-256], r8 sbb rax, QWORD PTR [r10+-248] mov r8, QWORD PTR [rcx+-240] mov QWORD PTR [rcx+-248], rax sbb r8, QWORD PTR [r10+-240] mov rax, QWORD PTR [rcx+-232] mov QWORD PTR [rcx+-240], r8 sbb rax, QWORD PTR [r10+-232] mov r8, QWORD PTR [rcx+-224] mov QWORD PTR [rcx+-232], rax sbb r8, QWORD PTR [r10+-224] mov rax, QWORD PTR [rcx+-216] mov QWORD PTR [rcx+-224], r8 sbb rax, QWORD PTR [r10+-216] mov r8, QWORD PTR [rcx+-208] mov QWORD PTR [rcx+-216], rax sbb r8, QWORD PTR [r10+-208] mov rax, QWORD PTR [rcx+-200] mov QWORD PTR [rcx+-208], r8 sbb rax, QWORD PTR [r10+-200] mov r8, QWORD PTR [rcx+-192] mov QWORD PTR [rcx+-200], rax sbb r8, QWORD PTR [r10+-192] mov rax, QWORD PTR [rcx+-184] mov QWORD PTR [rcx+-192], r8 sbb rax, QWORD PTR [r10+-184] mov r8, QWORD PTR [rcx+-176] mov QWORD PTR [rcx+-184], rax sbb r8, QWORD PTR [r10+-176] mov rax, QWORD PTR [rcx+-168] mov QWORD PTR [rcx+-176], r8 sbb rax, QWORD PTR [r10+-168] mov r8, QWORD PTR [rcx+-160] mov QWORD PTR [rcx+-168], rax sbb r8, QWORD PTR [r10+-160] mov rax, QWORD PTR [rcx+-152] mov QWORD PTR [rcx+-160], r8 sbb rax, QWORD PTR [r10+-152] mov r8, QWORD PTR [rcx+-144] mov QWORD PTR [rcx+-152], rax sbb r8, QWORD PTR [r10+-144] mov rax, QWORD PTR [rcx+-136] mov QWORD PTR [rcx+-144], r8 sbb rax, QWORD PTR [r10+-136] mov r8, QWORD PTR [rcx+-128] mov QWORD PTR [rcx+-136], rax sbb r8, QWORD PTR [r10+-128] mov rax, QWORD PTR [rcx+-120] mov QWORD PTR [rcx+-128], r8 sbb rax, QWORD PTR [r10+-120] mov r8, QWORD PTR [rcx+-112] mov QWORD PTR [rcx+-120], rax sbb r8, QWORD PTR [r10+-112] mov rax, QWORD PTR [rcx+-104] mov QWORD PTR [rcx+-112], r8 sbb rax, QWORD PTR [r10+-104] mov r8, QWORD PTR [rcx+-96] mov QWORD PTR [rcx+-104], rax sbb r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax sbb r8, QWORD PTR [r10+96] mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb rax, QWORD PTR [r10+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax sbb r8, QWORD PTR [r10+112] mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb rax, QWORD PTR [r10+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax sbb r8, QWORD PTR [r10+128] mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb rax, QWORD PTR [r10+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax sbb r8, QWORD PTR [r10+144] mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb rax, QWORD PTR [r10+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax sbb r8, QWORD PTR [r10+160] mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb rax, QWORD PTR [r10+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax sbb r8, QWORD PTR [r10+176] mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb rax, QWORD PTR [r10+184] mov r8, QWORD PTR [rcx+192] mov QWORD PTR [rcx+184], rax sbb r8, QWORD PTR [r10+192] mov rax, QWORD PTR [rcx+200] mov QWORD PTR [rcx+192], r8 sbb rax, QWORD PTR [r10+200] mov r8, QWORD PTR [rcx+208] mov QWORD PTR [rcx+200], rax sbb r8, QWORD PTR [r10+208] mov rax, QWORD PTR [rcx+216] mov QWORD PTR [rcx+208], r8 sbb rax, QWORD PTR [r10+216] mov r8, QWORD PTR [rcx+224] mov QWORD PTR [rcx+216], rax sbb r8, QWORD PTR [r10+224] mov rax, QWORD PTR [rcx+232] mov QWORD PTR [rcx+224], r8 sbb rax, QWORD PTR [r10+232] mov r8, QWORD PTR [rcx+240] mov QWORD PTR [rcx+232], rax sbb r8, QWORD PTR [r10+240] mov rax, QWORD PTR [rcx+248] mov QWORD PTR [rcx+240], r8 sbb rax, QWORD PTR [r10+248] mov QWORD PTR [rcx+248], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+512] add rcx, 768 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax adc r8, 0 mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 adc rax, 0 mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax adc r8, 0 mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 adc rax, 0 mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax adc r8, 0 mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 adc rax, 0 mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax adc r8, 0 mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 adc rax, 0 mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax adc r8, 0 mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 adc rax, 0 mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax adc r8, 0 mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 adc rax, 0 mov r8, QWORD PTR [rcx+192] mov QWORD PTR [rcx+184], rax adc r8, 0 mov rax, QWORD PTR [rcx+200] mov QWORD PTR [rcx+192], r8 adc rax, 0 mov r8, QWORD PTR [rcx+208] mov QWORD PTR [rcx+200], rax adc r8, 0 mov rax, QWORD PTR [rcx+216] mov QWORD PTR [rcx+208], r8 adc rax, 0 mov r8, QWORD PTR [rcx+224] mov QWORD PTR [rcx+216], rax adc r8, 0 mov rax, QWORD PTR [rcx+232] mov QWORD PTR [rcx+224], r8 adc rax, 0 mov r8, QWORD PTR [rcx+240] mov QWORD PTR [rcx+232], rax adc r8, 0 mov rax, QWORD PTR [rcx+248] mov QWORD PTR [rcx+240], r8 adc rax, 0 mov QWORD PTR [rcx+248], rax mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] add rsp, 528 ret sp_4096_sqr_64 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * Karatsuba: ah^2, al^2, (al - ah)^2 ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_4096_sqr_avx2_64 PROC sub rsp, 528 mov QWORD PTR [rsp+512], rcx mov QWORD PTR [rsp+520], rdx mov r9, 0 mov r10, rsp lea r11, QWORD PTR [rdx+256] mov rax, QWORD PTR [rdx] sub rax, QWORD PTR [r11] mov r8, QWORD PTR [rdx+8] mov QWORD PTR [r10], rax sbb r8, QWORD PTR [r11+8] mov rax, QWORD PTR [rdx+16] mov QWORD PTR [r10+8], r8 sbb rax, QWORD PTR [r11+16] mov r8, QWORD PTR [rdx+24] mov QWORD PTR [r10+16], rax sbb r8, QWORD PTR [r11+24] mov rax, QWORD PTR [rdx+32] mov QWORD PTR [r10+24], r8 sbb rax, QWORD PTR [r11+32] mov r8, QWORD PTR [rdx+40] mov QWORD PTR [r10+32], rax sbb r8, QWORD PTR [r11+40] mov rax, QWORD PTR [rdx+48] mov QWORD PTR [r10+40], r8 sbb rax, QWORD PTR [r11+48] mov r8, QWORD PTR [rdx+56] mov QWORD PTR [r10+48], rax sbb r8, QWORD PTR [r11+56] mov rax, QWORD PTR [rdx+64] mov QWORD PTR [r10+56], r8 sbb rax, QWORD PTR [r11+64] mov r8, QWORD PTR [rdx+72] mov QWORD PTR [r10+64], rax sbb r8, QWORD PTR [r11+72] mov rax, QWORD PTR [rdx+80] mov QWORD PTR [r10+72], r8 sbb rax, QWORD PTR [r11+80] mov r8, QWORD PTR [rdx+88] mov QWORD PTR [r10+80], rax sbb r8, QWORD PTR [r11+88] mov rax, QWORD PTR [rdx+96] mov QWORD PTR [r10+88], r8 sbb rax, QWORD PTR [r11+96] mov r8, QWORD PTR [rdx+104] mov QWORD PTR [r10+96], rax sbb r8, QWORD PTR [r11+104] mov rax, QWORD PTR [rdx+112] mov QWORD PTR [r10+104], r8 sbb rax, QWORD PTR [r11+112] mov r8, QWORD PTR [rdx+120] mov QWORD PTR [r10+112], rax sbb r8, QWORD PTR [r11+120] mov rax, QWORD PTR [rdx+128] mov QWORD PTR [r10+120], r8 sbb rax, QWORD PTR [r11+128] mov r8, QWORD PTR [rdx+136] mov QWORD PTR [r10+128], rax sbb r8, QWORD PTR [r11+136] mov rax, QWORD PTR [rdx+144] mov QWORD PTR [r10+136], r8 sbb rax, QWORD PTR [r11+144] mov r8, QWORD PTR [rdx+152] mov QWORD PTR [r10+144], rax sbb r8, QWORD PTR [r11+152] mov rax, QWORD PTR [rdx+160] mov QWORD PTR [r10+152], r8 sbb rax, QWORD PTR [r11+160] mov r8, QWORD PTR [rdx+168] mov QWORD PTR [r10+160], rax sbb r8, QWORD PTR [r11+168] mov rax, QWORD PTR [rdx+176] mov QWORD PTR [r10+168], r8 sbb rax, QWORD PTR [r11+176] mov r8, QWORD PTR [rdx+184] mov QWORD PTR [r10+176], rax sbb r8, QWORD PTR [r11+184] mov rax, QWORD PTR [rdx+192] mov QWORD PTR [r10+184], r8 sbb rax, QWORD PTR [r11+192] mov r8, QWORD PTR [rdx+200] mov QWORD PTR [r10+192], rax sbb r8, QWORD PTR [r11+200] mov rax, QWORD PTR [rdx+208] mov QWORD PTR [r10+200], r8 sbb rax, QWORD PTR [r11+208] mov r8, QWORD PTR [rdx+216] mov QWORD PTR [r10+208], rax sbb r8, QWORD PTR [r11+216] mov rax, QWORD PTR [rdx+224] mov QWORD PTR [r10+216], r8 sbb rax, QWORD PTR [r11+224] mov r8, QWORD PTR [rdx+232] mov QWORD PTR [r10+224], rax sbb r8, QWORD PTR [r11+232] mov rax, QWORD PTR [rdx+240] mov QWORD PTR [r10+232], r8 sbb rax, QWORD PTR [r11+240] mov r8, QWORD PTR [rdx+248] mov QWORD PTR [r10+240], rax sbb r8, QWORD PTR [r11+248] mov QWORD PTR [r10+248], r8 sbb r9, 0 ; Cond Negate mov rax, QWORD PTR [r10] mov r11, r9 xor rax, r9 neg r11 sub rax, r9 mov r8, QWORD PTR [r10+8] sbb r11, 0 mov QWORD PTR [r10], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+16] setc r11b mov QWORD PTR [r10+8], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+24] setc r11b mov QWORD PTR [r10+16], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+32] setc r11b mov QWORD PTR [r10+24], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+40] setc r11b mov QWORD PTR [r10+32], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+48] setc r11b mov QWORD PTR [r10+40], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+56] setc r11b mov QWORD PTR [r10+48], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+64] setc r11b mov QWORD PTR [r10+56], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+72] setc r11b mov QWORD PTR [r10+64], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+80] setc r11b mov QWORD PTR [r10+72], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+88] setc r11b mov QWORD PTR [r10+80], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+96] setc r11b mov QWORD PTR [r10+88], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+104] setc r11b mov QWORD PTR [r10+96], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+112] setc r11b mov QWORD PTR [r10+104], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+120] setc r11b mov QWORD PTR [r10+112], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+128] setc r11b mov QWORD PTR [r10+120], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+136] setc r11b mov QWORD PTR [r10+128], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+144] setc r11b mov QWORD PTR [r10+136], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+152] setc r11b mov QWORD PTR [r10+144], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+160] setc r11b mov QWORD PTR [r10+152], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+168] setc r11b mov QWORD PTR [r10+160], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+176] setc r11b mov QWORD PTR [r10+168], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+184] setc r11b mov QWORD PTR [r10+176], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+192] setc r11b mov QWORD PTR [r10+184], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+200] setc r11b mov QWORD PTR [r10+192], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+208] setc r11b mov QWORD PTR [r10+200], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+216] setc r11b mov QWORD PTR [r10+208], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+224] setc r11b mov QWORD PTR [r10+216], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+232] setc r11b mov QWORD PTR [r10+224], rax xor r8, r9 add r8, r11 mov rax, QWORD PTR [r10+240] setc r11b mov QWORD PTR [r10+232], r8 xor rax, r9 add rax, r11 mov r8, QWORD PTR [r10+248] setc r11b mov QWORD PTR [r10+240], rax xor r8, r9 add r8, r11 mov QWORD PTR [r10+248], r8 mov rdx, r10 mov rcx, rsp call sp_2048_sqr_avx2_32 mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] add rdx, 256 add rcx, 512 call sp_2048_sqr_avx2_32 mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] call sp_2048_sqr_avx2_32 IFDEF _WIN64 mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] ENDIF mov rdx, QWORD PTR [rsp+512] lea r10, QWORD PTR [rsp+256] add rdx, 768 mov r9, 0 mov r8, QWORD PTR [r10+-256] sub r8, QWORD PTR [rdx+-256] mov rax, QWORD PTR [r10+-248] mov QWORD PTR [r10+-256], r8 sbb rax, QWORD PTR [rdx+-248] mov r8, QWORD PTR [r10+-240] mov QWORD PTR [r10+-248], rax sbb r8, QWORD PTR [rdx+-240] mov rax, QWORD PTR [r10+-232] mov QWORD PTR [r10+-240], r8 sbb rax, QWORD PTR [rdx+-232] mov r8, QWORD PTR [r10+-224] mov QWORD PTR [r10+-232], rax sbb r8, QWORD PTR [rdx+-224] mov rax, QWORD PTR [r10+-216] mov QWORD PTR [r10+-224], r8 sbb rax, QWORD PTR [rdx+-216] mov r8, QWORD PTR [r10+-208] mov QWORD PTR [r10+-216], rax sbb r8, QWORD PTR [rdx+-208] mov rax, QWORD PTR [r10+-200] mov QWORD PTR [r10+-208], r8 sbb rax, QWORD PTR [rdx+-200] mov r8, QWORD PTR [r10+-192] mov QWORD PTR [r10+-200], rax sbb r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov r8, QWORD PTR [r10+192] mov QWORD PTR [r10+184], rax sbb r8, QWORD PTR [rdx+192] mov rax, QWORD PTR [r10+200] mov QWORD PTR [r10+192], r8 sbb rax, QWORD PTR [rdx+200] mov r8, QWORD PTR [r10+208] mov QWORD PTR [r10+200], rax sbb r8, QWORD PTR [rdx+208] mov rax, QWORD PTR [r10+216] mov QWORD PTR [r10+208], r8 sbb rax, QWORD PTR [rdx+216] mov r8, QWORD PTR [r10+224] mov QWORD PTR [r10+216], rax sbb r8, QWORD PTR [rdx+224] mov rax, QWORD PTR [r10+232] mov QWORD PTR [r10+224], r8 sbb rax, QWORD PTR [rdx+232] mov r8, QWORD PTR [r10+240] mov QWORD PTR [r10+232], rax sbb r8, QWORD PTR [rdx+240] mov rax, QWORD PTR [r10+248] mov QWORD PTR [r10+240], r8 sbb rax, QWORD PTR [rdx+248] mov QWORD PTR [r10+248], rax sbb r9, 0 sub rdx, 512 mov r8, QWORD PTR [r10+-256] sub r8, QWORD PTR [rdx+-256] mov rax, QWORD PTR [r10+-248] mov QWORD PTR [r10+-256], r8 sbb rax, QWORD PTR [rdx+-248] mov r8, QWORD PTR [r10+-240] mov QWORD PTR [r10+-248], rax sbb r8, QWORD PTR [rdx+-240] mov rax, QWORD PTR [r10+-232] mov QWORD PTR [r10+-240], r8 sbb rax, QWORD PTR [rdx+-232] mov r8, QWORD PTR [r10+-224] mov QWORD PTR [r10+-232], rax sbb r8, QWORD PTR [rdx+-224] mov rax, QWORD PTR [r10+-216] mov QWORD PTR [r10+-224], r8 sbb rax, QWORD PTR [rdx+-216] mov r8, QWORD PTR [r10+-208] mov QWORD PTR [r10+-216], rax sbb r8, QWORD PTR [rdx+-208] mov rax, QWORD PTR [r10+-200] mov QWORD PTR [r10+-208], r8 sbb rax, QWORD PTR [rdx+-200] mov r8, QWORD PTR [r10+-192] mov QWORD PTR [r10+-200], rax sbb r8, QWORD PTR [rdx+-192] mov rax, QWORD PTR [r10+-184] mov QWORD PTR [r10+-192], r8 sbb rax, QWORD PTR [rdx+-184] mov r8, QWORD PTR [r10+-176] mov QWORD PTR [r10+-184], rax sbb r8, QWORD PTR [rdx+-176] mov rax, QWORD PTR [r10+-168] mov QWORD PTR [r10+-176], r8 sbb rax, QWORD PTR [rdx+-168] mov r8, QWORD PTR [r10+-160] mov QWORD PTR [r10+-168], rax sbb r8, QWORD PTR [rdx+-160] mov rax, QWORD PTR [r10+-152] mov QWORD PTR [r10+-160], r8 sbb rax, QWORD PTR [rdx+-152] mov r8, QWORD PTR [r10+-144] mov QWORD PTR [r10+-152], rax sbb r8, QWORD PTR [rdx+-144] mov rax, QWORD PTR [r10+-136] mov QWORD PTR [r10+-144], r8 sbb rax, QWORD PTR [rdx+-136] mov r8, QWORD PTR [r10+-128] mov QWORD PTR [r10+-136], rax sbb r8, QWORD PTR [rdx+-128] mov rax, QWORD PTR [r10+-120] mov QWORD PTR [r10+-128], r8 sbb rax, QWORD PTR [rdx+-120] mov r8, QWORD PTR [r10+-112] mov QWORD PTR [r10+-120], rax sbb r8, QWORD PTR [rdx+-112] mov rax, QWORD PTR [r10+-104] mov QWORD PTR [r10+-112], r8 sbb rax, QWORD PTR [rdx+-104] mov r8, QWORD PTR [r10+-96] mov QWORD PTR [r10+-104], rax sbb r8, QWORD PTR [rdx+-96] mov rax, QWORD PTR [r10+-88] mov QWORD PTR [r10+-96], r8 sbb rax, QWORD PTR [rdx+-88] mov r8, QWORD PTR [r10+-80] mov QWORD PTR [r10+-88], rax sbb r8, QWORD PTR [rdx+-80] mov rax, QWORD PTR [r10+-72] mov QWORD PTR [r10+-80], r8 sbb rax, QWORD PTR [rdx+-72] mov r8, QWORD PTR [r10+-64] mov QWORD PTR [r10+-72], rax sbb r8, QWORD PTR [rdx+-64] mov rax, QWORD PTR [r10+-56] mov QWORD PTR [r10+-64], r8 sbb rax, QWORD PTR [rdx+-56] mov r8, QWORD PTR [r10+-48] mov QWORD PTR [r10+-56], rax sbb r8, QWORD PTR [rdx+-48] mov rax, QWORD PTR [r10+-40] mov QWORD PTR [r10+-48], r8 sbb rax, QWORD PTR [rdx+-40] mov r8, QWORD PTR [r10+-32] mov QWORD PTR [r10+-40], rax sbb r8, QWORD PTR [rdx+-32] mov rax, QWORD PTR [r10+-24] mov QWORD PTR [r10+-32], r8 sbb rax, QWORD PTR [rdx+-24] mov r8, QWORD PTR [r10+-16] mov QWORD PTR [r10+-24], rax sbb r8, QWORD PTR [rdx+-16] mov rax, QWORD PTR [r10+-8] mov QWORD PTR [r10+-16], r8 sbb rax, QWORD PTR [rdx+-8] mov r8, QWORD PTR [r10] mov QWORD PTR [r10+-8], rax sbb r8, QWORD PTR [rdx] mov rax, QWORD PTR [r10+8] mov QWORD PTR [r10], r8 sbb rax, QWORD PTR [rdx+8] mov r8, QWORD PTR [r10+16] mov QWORD PTR [r10+8], rax sbb r8, QWORD PTR [rdx+16] mov rax, QWORD PTR [r10+24] mov QWORD PTR [r10+16], r8 sbb rax, QWORD PTR [rdx+24] mov r8, QWORD PTR [r10+32] mov QWORD PTR [r10+24], rax sbb r8, QWORD PTR [rdx+32] mov rax, QWORD PTR [r10+40] mov QWORD PTR [r10+32], r8 sbb rax, QWORD PTR [rdx+40] mov r8, QWORD PTR [r10+48] mov QWORD PTR [r10+40], rax sbb r8, QWORD PTR [rdx+48] mov rax, QWORD PTR [r10+56] mov QWORD PTR [r10+48], r8 sbb rax, QWORD PTR [rdx+56] mov r8, QWORD PTR [r10+64] mov QWORD PTR [r10+56], rax sbb r8, QWORD PTR [rdx+64] mov rax, QWORD PTR [r10+72] mov QWORD PTR [r10+64], r8 sbb rax, QWORD PTR [rdx+72] mov r8, QWORD PTR [r10+80] mov QWORD PTR [r10+72], rax sbb r8, QWORD PTR [rdx+80] mov rax, QWORD PTR [r10+88] mov QWORD PTR [r10+80], r8 sbb rax, QWORD PTR [rdx+88] mov r8, QWORD PTR [r10+96] mov QWORD PTR [r10+88], rax sbb r8, QWORD PTR [rdx+96] mov rax, QWORD PTR [r10+104] mov QWORD PTR [r10+96], r8 sbb rax, QWORD PTR [rdx+104] mov r8, QWORD PTR [r10+112] mov QWORD PTR [r10+104], rax sbb r8, QWORD PTR [rdx+112] mov rax, QWORD PTR [r10+120] mov QWORD PTR [r10+112], r8 sbb rax, QWORD PTR [rdx+120] mov r8, QWORD PTR [r10+128] mov QWORD PTR [r10+120], rax sbb r8, QWORD PTR [rdx+128] mov rax, QWORD PTR [r10+136] mov QWORD PTR [r10+128], r8 sbb rax, QWORD PTR [rdx+136] mov r8, QWORD PTR [r10+144] mov QWORD PTR [r10+136], rax sbb r8, QWORD PTR [rdx+144] mov rax, QWORD PTR [r10+152] mov QWORD PTR [r10+144], r8 sbb rax, QWORD PTR [rdx+152] mov r8, QWORD PTR [r10+160] mov QWORD PTR [r10+152], rax sbb r8, QWORD PTR [rdx+160] mov rax, QWORD PTR [r10+168] mov QWORD PTR [r10+160], r8 sbb rax, QWORD PTR [rdx+168] mov r8, QWORD PTR [r10+176] mov QWORD PTR [r10+168], rax sbb r8, QWORD PTR [rdx+176] mov rax, QWORD PTR [r10+184] mov QWORD PTR [r10+176], r8 sbb rax, QWORD PTR [rdx+184] mov r8, QWORD PTR [r10+192] mov QWORD PTR [r10+184], rax sbb r8, QWORD PTR [rdx+192] mov rax, QWORD PTR [r10+200] mov QWORD PTR [r10+192], r8 sbb rax, QWORD PTR [rdx+200] mov r8, QWORD PTR [r10+208] mov QWORD PTR [r10+200], rax sbb r8, QWORD PTR [rdx+208] mov rax, QWORD PTR [r10+216] mov QWORD PTR [r10+208], r8 sbb rax, QWORD PTR [rdx+216] mov r8, QWORD PTR [r10+224] mov QWORD PTR [r10+216], rax sbb r8, QWORD PTR [rdx+224] mov rax, QWORD PTR [r10+232] mov QWORD PTR [r10+224], r8 sbb rax, QWORD PTR [rdx+232] mov r8, QWORD PTR [r10+240] mov QWORD PTR [r10+232], rax sbb r8, QWORD PTR [rdx+240] mov rax, QWORD PTR [r10+248] mov QWORD PTR [r10+240], r8 sbb rax, QWORD PTR [rdx+248] mov QWORD PTR [r10+248], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+512] neg r9 add rcx, 512 mov r8, QWORD PTR [rcx+-256] sub r8, QWORD PTR [r10+-256] mov rax, QWORD PTR [rcx+-248] mov QWORD PTR [rcx+-256], r8 sbb rax, QWORD PTR [r10+-248] mov r8, QWORD PTR [rcx+-240] mov QWORD PTR [rcx+-248], rax sbb r8, QWORD PTR [r10+-240] mov rax, QWORD PTR [rcx+-232] mov QWORD PTR [rcx+-240], r8 sbb rax, QWORD PTR [r10+-232] mov r8, QWORD PTR [rcx+-224] mov QWORD PTR [rcx+-232], rax sbb r8, QWORD PTR [r10+-224] mov rax, QWORD PTR [rcx+-216] mov QWORD PTR [rcx+-224], r8 sbb rax, QWORD PTR [r10+-216] mov r8, QWORD PTR [rcx+-208] mov QWORD PTR [rcx+-216], rax sbb r8, QWORD PTR [r10+-208] mov rax, QWORD PTR [rcx+-200] mov QWORD PTR [rcx+-208], r8 sbb rax, QWORD PTR [r10+-200] mov r8, QWORD PTR [rcx+-192] mov QWORD PTR [rcx+-200], rax sbb r8, QWORD PTR [r10+-192] mov rax, QWORD PTR [rcx+-184] mov QWORD PTR [rcx+-192], r8 sbb rax, QWORD PTR [r10+-184] mov r8, QWORD PTR [rcx+-176] mov QWORD PTR [rcx+-184], rax sbb r8, QWORD PTR [r10+-176] mov rax, QWORD PTR [rcx+-168] mov QWORD PTR [rcx+-176], r8 sbb rax, QWORD PTR [r10+-168] mov r8, QWORD PTR [rcx+-160] mov QWORD PTR [rcx+-168], rax sbb r8, QWORD PTR [r10+-160] mov rax, QWORD PTR [rcx+-152] mov QWORD PTR [rcx+-160], r8 sbb rax, QWORD PTR [r10+-152] mov r8, QWORD PTR [rcx+-144] mov QWORD PTR [rcx+-152], rax sbb r8, QWORD PTR [r10+-144] mov rax, QWORD PTR [rcx+-136] mov QWORD PTR [rcx+-144], r8 sbb rax, QWORD PTR [r10+-136] mov r8, QWORD PTR [rcx+-128] mov QWORD PTR [rcx+-136], rax sbb r8, QWORD PTR [r10+-128] mov rax, QWORD PTR [rcx+-120] mov QWORD PTR [rcx+-128], r8 sbb rax, QWORD PTR [r10+-120] mov r8, QWORD PTR [rcx+-112] mov QWORD PTR [rcx+-120], rax sbb r8, QWORD PTR [r10+-112] mov rax, QWORD PTR [rcx+-104] mov QWORD PTR [rcx+-112], r8 sbb rax, QWORD PTR [r10+-104] mov r8, QWORD PTR [rcx+-96] mov QWORD PTR [rcx+-104], rax sbb r8, QWORD PTR [r10+-96] mov rax, QWORD PTR [rcx+-88] mov QWORD PTR [rcx+-96], r8 sbb rax, QWORD PTR [r10+-88] mov r8, QWORD PTR [rcx+-80] mov QWORD PTR [rcx+-88], rax sbb r8, QWORD PTR [r10+-80] mov rax, QWORD PTR [rcx+-72] mov QWORD PTR [rcx+-80], r8 sbb rax, QWORD PTR [r10+-72] mov r8, QWORD PTR [rcx+-64] mov QWORD PTR [rcx+-72], rax sbb r8, QWORD PTR [r10+-64] mov rax, QWORD PTR [rcx+-56] mov QWORD PTR [rcx+-64], r8 sbb rax, QWORD PTR [r10+-56] mov r8, QWORD PTR [rcx+-48] mov QWORD PTR [rcx+-56], rax sbb r8, QWORD PTR [r10+-48] mov rax, QWORD PTR [rcx+-40] mov QWORD PTR [rcx+-48], r8 sbb rax, QWORD PTR [r10+-40] mov r8, QWORD PTR [rcx+-32] mov QWORD PTR [rcx+-40], rax sbb r8, QWORD PTR [r10+-32] mov rax, QWORD PTR [rcx+-24] mov QWORD PTR [rcx+-32], r8 sbb rax, QWORD PTR [r10+-24] mov r8, QWORD PTR [rcx+-16] mov QWORD PTR [rcx+-24], rax sbb r8, QWORD PTR [r10+-16] mov rax, QWORD PTR [rcx+-8] mov QWORD PTR [rcx+-16], r8 sbb rax, QWORD PTR [r10+-8] mov r8, QWORD PTR [rcx] mov QWORD PTR [rcx+-8], rax sbb r8, QWORD PTR [r10] mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb rax, QWORD PTR [r10+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax sbb r8, QWORD PTR [r10+16] mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb rax, QWORD PTR [r10+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax sbb r8, QWORD PTR [r10+32] mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb rax, QWORD PTR [r10+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax sbb r8, QWORD PTR [r10+48] mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb rax, QWORD PTR [r10+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax sbb r8, QWORD PTR [r10+64] mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb rax, QWORD PTR [r10+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax sbb r8, QWORD PTR [r10+80] mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb rax, QWORD PTR [r10+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax sbb r8, QWORD PTR [r10+96] mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb rax, QWORD PTR [r10+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax sbb r8, QWORD PTR [r10+112] mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb rax, QWORD PTR [r10+120] mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax sbb r8, QWORD PTR [r10+128] mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 sbb rax, QWORD PTR [r10+136] mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax sbb r8, QWORD PTR [r10+144] mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 sbb rax, QWORD PTR [r10+152] mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax sbb r8, QWORD PTR [r10+160] mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 sbb rax, QWORD PTR [r10+168] mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax sbb r8, QWORD PTR [r10+176] mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 sbb rax, QWORD PTR [r10+184] mov r8, QWORD PTR [rcx+192] mov QWORD PTR [rcx+184], rax sbb r8, QWORD PTR [r10+192] mov rax, QWORD PTR [rcx+200] mov QWORD PTR [rcx+192], r8 sbb rax, QWORD PTR [r10+200] mov r8, QWORD PTR [rcx+208] mov QWORD PTR [rcx+200], rax sbb r8, QWORD PTR [r10+208] mov rax, QWORD PTR [rcx+216] mov QWORD PTR [rcx+208], r8 sbb rax, QWORD PTR [r10+216] mov r8, QWORD PTR [rcx+224] mov QWORD PTR [rcx+216], rax sbb r8, QWORD PTR [r10+224] mov rax, QWORD PTR [rcx+232] mov QWORD PTR [rcx+224], r8 sbb rax, QWORD PTR [r10+232] mov r8, QWORD PTR [rcx+240] mov QWORD PTR [rcx+232], rax sbb r8, QWORD PTR [r10+240] mov rax, QWORD PTR [rcx+248] mov QWORD PTR [rcx+240], r8 sbb rax, QWORD PTR [r10+248] mov QWORD PTR [rcx+248], rax sbb r9, 0 mov rcx, QWORD PTR [rsp+512] add rcx, 768 ; Add in word mov r8, QWORD PTR [rcx] add r8, r9 mov rax, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 adc rax, 0 mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], rax adc r8, 0 mov rax, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 adc rax, 0 mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], rax adc r8, 0 mov rax, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 adc rax, 0 mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], rax adc r8, 0 mov rax, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 adc rax, 0 mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], rax adc r8, 0 mov rax, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 adc rax, 0 mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], rax adc r8, 0 mov rax, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 adc rax, 0 mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], rax adc r8, 0 mov rax, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 adc rax, 0 mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], rax adc r8, 0 mov rax, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 adc rax, 0 mov r8, QWORD PTR [rcx+128] mov QWORD PTR [rcx+120], rax adc r8, 0 mov rax, QWORD PTR [rcx+136] mov QWORD PTR [rcx+128], r8 adc rax, 0 mov r8, QWORD PTR [rcx+144] mov QWORD PTR [rcx+136], rax adc r8, 0 mov rax, QWORD PTR [rcx+152] mov QWORD PTR [rcx+144], r8 adc rax, 0 mov r8, QWORD PTR [rcx+160] mov QWORD PTR [rcx+152], rax adc r8, 0 mov rax, QWORD PTR [rcx+168] mov QWORD PTR [rcx+160], r8 adc rax, 0 mov r8, QWORD PTR [rcx+176] mov QWORD PTR [rcx+168], rax adc r8, 0 mov rax, QWORD PTR [rcx+184] mov QWORD PTR [rcx+176], r8 adc rax, 0 mov r8, QWORD PTR [rcx+192] mov QWORD PTR [rcx+184], rax adc r8, 0 mov rax, QWORD PTR [rcx+200] mov QWORD PTR [rcx+192], r8 adc rax, 0 mov r8, QWORD PTR [rcx+208] mov QWORD PTR [rcx+200], rax adc r8, 0 mov rax, QWORD PTR [rcx+216] mov QWORD PTR [rcx+208], r8 adc rax, 0 mov r8, QWORD PTR [rcx+224] mov QWORD PTR [rcx+216], rax adc r8, 0 mov rax, QWORD PTR [rcx+232] mov QWORD PTR [rcx+224], r8 adc rax, 0 mov r8, QWORD PTR [rcx+240] mov QWORD PTR [rcx+232], rax adc r8, 0 mov rax, QWORD PTR [rcx+248] mov QWORD PTR [rcx+240], r8 adc rax, 0 mov QWORD PTR [rcx+248], rax mov rdx, QWORD PTR [rsp+520] mov rcx, QWORD PTR [rsp+512] add rsp, 528 ret sp_4096_sqr_avx2_64 ENDP _text ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_4096_mul_d_64 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+40] add r12, rax mov QWORD PTR [rcx+40], r12 adc r10, rdx adc r11, 0 ; A[6] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+48] add r10, rax mov QWORD PTR [rcx+48], r10 adc r11, rdx adc r12, 0 ; A[7] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+56] add r11, rax mov QWORD PTR [rcx+56], r11 adc r12, rdx adc r10, 0 ; A[8] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+64] add r12, rax mov QWORD PTR [rcx+64], r12 adc r10, rdx adc r11, 0 ; A[9] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+72] add r10, rax mov QWORD PTR [rcx+72], r10 adc r11, rdx adc r12, 0 ; A[10] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+80] add r11, rax mov QWORD PTR [rcx+80], r11 adc r12, rdx adc r10, 0 ; A[11] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+88] add r12, rax mov QWORD PTR [rcx+88], r12 adc r10, rdx adc r11, 0 ; A[12] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+96] add r10, rax mov QWORD PTR [rcx+96], r10 adc r11, rdx adc r12, 0 ; A[13] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+104] add r11, rax mov QWORD PTR [rcx+104], r11 adc r12, rdx adc r10, 0 ; A[14] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+112] add r12, rax mov QWORD PTR [rcx+112], r12 adc r10, rdx adc r11, 0 ; A[15] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+120] add r10, rax mov QWORD PTR [rcx+120], r10 adc r11, rdx adc r12, 0 ; A[16] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+128] add r11, rax mov QWORD PTR [rcx+128], r11 adc r12, rdx adc r10, 0 ; A[17] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+136] add r12, rax mov QWORD PTR [rcx+136], r12 adc r10, rdx adc r11, 0 ; A[18] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+144] add r10, rax mov QWORD PTR [rcx+144], r10 adc r11, rdx adc r12, 0 ; A[19] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+152] add r11, rax mov QWORD PTR [rcx+152], r11 adc r12, rdx adc r10, 0 ; A[20] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+160] add r12, rax mov QWORD PTR [rcx+160], r12 adc r10, rdx adc r11, 0 ; A[21] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+168] add r10, rax mov QWORD PTR [rcx+168], r10 adc r11, rdx adc r12, 0 ; A[22] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+176] add r11, rax mov QWORD PTR [rcx+176], r11 adc r12, rdx adc r10, 0 ; A[23] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+184] add r12, rax mov QWORD PTR [rcx+184], r12 adc r10, rdx adc r11, 0 ; A[24] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+192] add r10, rax mov QWORD PTR [rcx+192], r10 adc r11, rdx adc r12, 0 ; A[25] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+200] add r11, rax mov QWORD PTR [rcx+200], r11 adc r12, rdx adc r10, 0 ; A[26] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+208] add r12, rax mov QWORD PTR [rcx+208], r12 adc r10, rdx adc r11, 0 ; A[27] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+216] add r10, rax mov QWORD PTR [rcx+216], r10 adc r11, rdx adc r12, 0 ; A[28] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+224] add r11, rax mov QWORD PTR [rcx+224], r11 adc r12, rdx adc r10, 0 ; A[29] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+232] add r12, rax mov QWORD PTR [rcx+232], r12 adc r10, rdx adc r11, 0 ; A[30] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+240] add r10, rax mov QWORD PTR [rcx+240], r10 adc r11, rdx adc r12, 0 ; A[31] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+248] add r11, rax mov QWORD PTR [rcx+248], r11 adc r12, rdx adc r10, 0 ; A[32] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+256] add r12, rax mov QWORD PTR [rcx+256], r12 adc r10, rdx adc r11, 0 ; A[33] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+264] add r10, rax mov QWORD PTR [rcx+264], r10 adc r11, rdx adc r12, 0 ; A[34] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+272] add r11, rax mov QWORD PTR [rcx+272], r11 adc r12, rdx adc r10, 0 ; A[35] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+280] add r12, rax mov QWORD PTR [rcx+280], r12 adc r10, rdx adc r11, 0 ; A[36] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+288] add r10, rax mov QWORD PTR [rcx+288], r10 adc r11, rdx adc r12, 0 ; A[37] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+296] add r11, rax mov QWORD PTR [rcx+296], r11 adc r12, rdx adc r10, 0 ; A[38] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+304] add r12, rax mov QWORD PTR [rcx+304], r12 adc r10, rdx adc r11, 0 ; A[39] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+312] add r10, rax mov QWORD PTR [rcx+312], r10 adc r11, rdx adc r12, 0 ; A[40] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+320] add r11, rax mov QWORD PTR [rcx+320], r11 adc r12, rdx adc r10, 0 ; A[41] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+328] add r12, rax mov QWORD PTR [rcx+328], r12 adc r10, rdx adc r11, 0 ; A[42] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+336] add r10, rax mov QWORD PTR [rcx+336], r10 adc r11, rdx adc r12, 0 ; A[43] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+344] add r11, rax mov QWORD PTR [rcx+344], r11 adc r12, rdx adc r10, 0 ; A[44] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+352] add r12, rax mov QWORD PTR [rcx+352], r12 adc r10, rdx adc r11, 0 ; A[45] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+360] add r10, rax mov QWORD PTR [rcx+360], r10 adc r11, rdx adc r12, 0 ; A[46] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+368] add r11, rax mov QWORD PTR [rcx+368], r11 adc r12, rdx adc r10, 0 ; A[47] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+376] add r12, rax mov QWORD PTR [rcx+376], r12 adc r10, rdx adc r11, 0 ; A[48] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+384] add r10, rax mov QWORD PTR [rcx+384], r10 adc r11, rdx adc r12, 0 ; A[49] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+392] add r11, rax mov QWORD PTR [rcx+392], r11 adc r12, rdx adc r10, 0 ; A[50] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+400] add r12, rax mov QWORD PTR [rcx+400], r12 adc r10, rdx adc r11, 0 ; A[51] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+408] add r10, rax mov QWORD PTR [rcx+408], r10 adc r11, rdx adc r12, 0 ; A[52] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+416] add r11, rax mov QWORD PTR [rcx+416], r11 adc r12, rdx adc r10, 0 ; A[53] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+424] add r12, rax mov QWORD PTR [rcx+424], r12 adc r10, rdx adc r11, 0 ; A[54] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+432] add r10, rax mov QWORD PTR [rcx+432], r10 adc r11, rdx adc r12, 0 ; A[55] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+440] add r11, rax mov QWORD PTR [rcx+440], r11 adc r12, rdx adc r10, 0 ; A[56] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+448] add r12, rax mov QWORD PTR [rcx+448], r12 adc r10, rdx adc r11, 0 ; A[57] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+456] add r10, rax mov QWORD PTR [rcx+456], r10 adc r11, rdx adc r12, 0 ; A[58] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+464] add r11, rax mov QWORD PTR [rcx+464], r11 adc r12, rdx adc r10, 0 ; A[59] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+472] add r12, rax mov QWORD PTR [rcx+472], r12 adc r10, rdx adc r11, 0 ; A[60] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+480] add r10, rax mov QWORD PTR [rcx+480], r10 adc r11, rdx adc r12, 0 ; A[61] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+488] add r11, rax mov QWORD PTR [rcx+488], r11 adc r12, rdx adc r10, 0 ; A[62] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+496] add r12, rax mov QWORD PTR [rcx+496], r12 adc r10, rdx adc r11, 0 ; A[63] * B mov rax, r8 mul QWORD PTR [r9+504] add r10, rax adc r11, rdx mov QWORD PTR [rcx+504], r10 mov QWORD PTR [rcx+512], r11 pop r12 ret sp_4096_mul_d_64 ENDP _text ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_4096_cond_sub_64 PROC sub rsp, 512 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [r8+128] mov r11, QWORD PTR [r8+136] and r10, r9 and r11, r9 mov QWORD PTR [rsp+128], r10 mov QWORD PTR [rsp+136], r11 mov r10, QWORD PTR [r8+144] mov r11, QWORD PTR [r8+152] and r10, r9 and r11, r9 mov QWORD PTR [rsp+144], r10 mov QWORD PTR [rsp+152], r11 mov r10, QWORD PTR [r8+160] mov r11, QWORD PTR [r8+168] and r10, r9 and r11, r9 mov QWORD PTR [rsp+160], r10 mov QWORD PTR [rsp+168], r11 mov r10, QWORD PTR [r8+176] mov r11, QWORD PTR [r8+184] and r10, r9 and r11, r9 mov QWORD PTR [rsp+176], r10 mov QWORD PTR [rsp+184], r11 mov r10, QWORD PTR [r8+192] mov r11, QWORD PTR [r8+200] and r10, r9 and r11, r9 mov QWORD PTR [rsp+192], r10 mov QWORD PTR [rsp+200], r11 mov r10, QWORD PTR [r8+208] mov r11, QWORD PTR [r8+216] and r10, r9 and r11, r9 mov QWORD PTR [rsp+208], r10 mov QWORD PTR [rsp+216], r11 mov r10, QWORD PTR [r8+224] mov r11, QWORD PTR [r8+232] and r10, r9 and r11, r9 mov QWORD PTR [rsp+224], r10 mov QWORD PTR [rsp+232], r11 mov r10, QWORD PTR [r8+240] mov r11, QWORD PTR [r8+248] and r10, r9 and r11, r9 mov QWORD PTR [rsp+240], r10 mov QWORD PTR [rsp+248], r11 mov r10, QWORD PTR [r8+256] mov r11, QWORD PTR [r8+264] and r10, r9 and r11, r9 mov QWORD PTR [rsp+256], r10 mov QWORD PTR [rsp+264], r11 mov r10, QWORD PTR [r8+272] mov r11, QWORD PTR [r8+280] and r10, r9 and r11, r9 mov QWORD PTR [rsp+272], r10 mov QWORD PTR [rsp+280], r11 mov r10, QWORD PTR [r8+288] mov r11, QWORD PTR [r8+296] and r10, r9 and r11, r9 mov QWORD PTR [rsp+288], r10 mov QWORD PTR [rsp+296], r11 mov r10, QWORD PTR [r8+304] mov r11, QWORD PTR [r8+312] and r10, r9 and r11, r9 mov QWORD PTR [rsp+304], r10 mov QWORD PTR [rsp+312], r11 mov r10, QWORD PTR [r8+320] mov r11, QWORD PTR [r8+328] and r10, r9 and r11, r9 mov QWORD PTR [rsp+320], r10 mov QWORD PTR [rsp+328], r11 mov r10, QWORD PTR [r8+336] mov r11, QWORD PTR [r8+344] and r10, r9 and r11, r9 mov QWORD PTR [rsp+336], r10 mov QWORD PTR [rsp+344], r11 mov r10, QWORD PTR [r8+352] mov r11, QWORD PTR [r8+360] and r10, r9 and r11, r9 mov QWORD PTR [rsp+352], r10 mov QWORD PTR [rsp+360], r11 mov r10, QWORD PTR [r8+368] mov r11, QWORD PTR [r8+376] and r10, r9 and r11, r9 mov QWORD PTR [rsp+368], r10 mov QWORD PTR [rsp+376], r11 mov r10, QWORD PTR [r8+384] mov r11, QWORD PTR [r8+392] and r10, r9 and r11, r9 mov QWORD PTR [rsp+384], r10 mov QWORD PTR [rsp+392], r11 mov r10, QWORD PTR [r8+400] mov r11, QWORD PTR [r8+408] and r10, r9 and r11, r9 mov QWORD PTR [rsp+400], r10 mov QWORD PTR [rsp+408], r11 mov r10, QWORD PTR [r8+416] mov r11, QWORD PTR [r8+424] and r10, r9 and r11, r9 mov QWORD PTR [rsp+416], r10 mov QWORD PTR [rsp+424], r11 mov r10, QWORD PTR [r8+432] mov r11, QWORD PTR [r8+440] and r10, r9 and r11, r9 mov QWORD PTR [rsp+432], r10 mov QWORD PTR [rsp+440], r11 mov r10, QWORD PTR [r8+448] mov r11, QWORD PTR [r8+456] and r10, r9 and r11, r9 mov QWORD PTR [rsp+448], r10 mov QWORD PTR [rsp+456], r11 mov r10, QWORD PTR [r8+464] mov r11, QWORD PTR [r8+472] and r10, r9 and r11, r9 mov QWORD PTR [rsp+464], r10 mov QWORD PTR [rsp+472], r11 mov r10, QWORD PTR [r8+480] mov r11, QWORD PTR [r8+488] and r10, r9 and r11, r9 mov QWORD PTR [rsp+480], r10 mov QWORD PTR [rsp+488], r11 mov r10, QWORD PTR [r8+496] mov r11, QWORD PTR [r8+504] and r10, r9 and r11, r9 mov QWORD PTR [rsp+496], r10 mov QWORD PTR [rsp+504], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] sbb r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] sbb r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] sbb r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] sbb r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] sbb r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] sbb r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] sbb r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] sbb r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] sbb r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] sbb r11, r8 mov QWORD PTR [rcx+112], r10 mov r10, QWORD PTR [rdx+128] mov r8, QWORD PTR [rsp+128] sbb r10, r8 mov QWORD PTR [rcx+120], r11 mov r11, QWORD PTR [rdx+136] mov r8, QWORD PTR [rsp+136] sbb r11, r8 mov QWORD PTR [rcx+128], r10 mov r10, QWORD PTR [rdx+144] mov r8, QWORD PTR [rsp+144] sbb r10, r8 mov QWORD PTR [rcx+136], r11 mov r11, QWORD PTR [rdx+152] mov r8, QWORD PTR [rsp+152] sbb r11, r8 mov QWORD PTR [rcx+144], r10 mov r10, QWORD PTR [rdx+160] mov r8, QWORD PTR [rsp+160] sbb r10, r8 mov QWORD PTR [rcx+152], r11 mov r11, QWORD PTR [rdx+168] mov r8, QWORD PTR [rsp+168] sbb r11, r8 mov QWORD PTR [rcx+160], r10 mov r10, QWORD PTR [rdx+176] mov r8, QWORD PTR [rsp+176] sbb r10, r8 mov QWORD PTR [rcx+168], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rsp+184] sbb r11, r8 mov QWORD PTR [rcx+176], r10 mov r10, QWORD PTR [rdx+192] mov r8, QWORD PTR [rsp+192] sbb r10, r8 mov QWORD PTR [rcx+184], r11 mov r11, QWORD PTR [rdx+200] mov r8, QWORD PTR [rsp+200] sbb r11, r8 mov QWORD PTR [rcx+192], r10 mov r10, QWORD PTR [rdx+208] mov r8, QWORD PTR [rsp+208] sbb r10, r8 mov QWORD PTR [rcx+200], r11 mov r11, QWORD PTR [rdx+216] mov r8, QWORD PTR [rsp+216] sbb r11, r8 mov QWORD PTR [rcx+208], r10 mov r10, QWORD PTR [rdx+224] mov r8, QWORD PTR [rsp+224] sbb r10, r8 mov QWORD PTR [rcx+216], r11 mov r11, QWORD PTR [rdx+232] mov r8, QWORD PTR [rsp+232] sbb r11, r8 mov QWORD PTR [rcx+224], r10 mov r10, QWORD PTR [rdx+240] mov r8, QWORD PTR [rsp+240] sbb r10, r8 mov QWORD PTR [rcx+232], r11 mov r11, QWORD PTR [rdx+248] mov r8, QWORD PTR [rsp+248] sbb r11, r8 mov QWORD PTR [rcx+240], r10 mov r10, QWORD PTR [rdx+256] mov r8, QWORD PTR [rsp+256] sbb r10, r8 mov QWORD PTR [rcx+248], r11 mov r11, QWORD PTR [rdx+264] mov r8, QWORD PTR [rsp+264] sbb r11, r8 mov QWORD PTR [rcx+256], r10 mov r10, QWORD PTR [rdx+272] mov r8, QWORD PTR [rsp+272] sbb r10, r8 mov QWORD PTR [rcx+264], r11 mov r11, QWORD PTR [rdx+280] mov r8, QWORD PTR [rsp+280] sbb r11, r8 mov QWORD PTR [rcx+272], r10 mov r10, QWORD PTR [rdx+288] mov r8, QWORD PTR [rsp+288] sbb r10, r8 mov QWORD PTR [rcx+280], r11 mov r11, QWORD PTR [rdx+296] mov r8, QWORD PTR [rsp+296] sbb r11, r8 mov QWORD PTR [rcx+288], r10 mov r10, QWORD PTR [rdx+304] mov r8, QWORD PTR [rsp+304] sbb r10, r8 mov QWORD PTR [rcx+296], r11 mov r11, QWORD PTR [rdx+312] mov r8, QWORD PTR [rsp+312] sbb r11, r8 mov QWORD PTR [rcx+304], r10 mov r10, QWORD PTR [rdx+320] mov r8, QWORD PTR [rsp+320] sbb r10, r8 mov QWORD PTR [rcx+312], r11 mov r11, QWORD PTR [rdx+328] mov r8, QWORD PTR [rsp+328] sbb r11, r8 mov QWORD PTR [rcx+320], r10 mov r10, QWORD PTR [rdx+336] mov r8, QWORD PTR [rsp+336] sbb r10, r8 mov QWORD PTR [rcx+328], r11 mov r11, QWORD PTR [rdx+344] mov r8, QWORD PTR [rsp+344] sbb r11, r8 mov QWORD PTR [rcx+336], r10 mov r10, QWORD PTR [rdx+352] mov r8, QWORD PTR [rsp+352] sbb r10, r8 mov QWORD PTR [rcx+344], r11 mov r11, QWORD PTR [rdx+360] mov r8, QWORD PTR [rsp+360] sbb r11, r8 mov QWORD PTR [rcx+352], r10 mov r10, QWORD PTR [rdx+368] mov r8, QWORD PTR [rsp+368] sbb r10, r8 mov QWORD PTR [rcx+360], r11 mov r11, QWORD PTR [rdx+376] mov r8, QWORD PTR [rsp+376] sbb r11, r8 mov QWORD PTR [rcx+368], r10 mov r10, QWORD PTR [rdx+384] mov r8, QWORD PTR [rsp+384] sbb r10, r8 mov QWORD PTR [rcx+376], r11 mov r11, QWORD PTR [rdx+392] mov r8, QWORD PTR [rsp+392] sbb r11, r8 mov QWORD PTR [rcx+384], r10 mov r10, QWORD PTR [rdx+400] mov r8, QWORD PTR [rsp+400] sbb r10, r8 mov QWORD PTR [rcx+392], r11 mov r11, QWORD PTR [rdx+408] mov r8, QWORD PTR [rsp+408] sbb r11, r8 mov QWORD PTR [rcx+400], r10 mov r10, QWORD PTR [rdx+416] mov r8, QWORD PTR [rsp+416] sbb r10, r8 mov QWORD PTR [rcx+408], r11 mov r11, QWORD PTR [rdx+424] mov r8, QWORD PTR [rsp+424] sbb r11, r8 mov QWORD PTR [rcx+416], r10 mov r10, QWORD PTR [rdx+432] mov r8, QWORD PTR [rsp+432] sbb r10, r8 mov QWORD PTR [rcx+424], r11 mov r11, QWORD PTR [rdx+440] mov r8, QWORD PTR [rsp+440] sbb r11, r8 mov QWORD PTR [rcx+432], r10 mov r10, QWORD PTR [rdx+448] mov r8, QWORD PTR [rsp+448] sbb r10, r8 mov QWORD PTR [rcx+440], r11 mov r11, QWORD PTR [rdx+456] mov r8, QWORD PTR [rsp+456] sbb r11, r8 mov QWORD PTR [rcx+448], r10 mov r10, QWORD PTR [rdx+464] mov r8, QWORD PTR [rsp+464] sbb r10, r8 mov QWORD PTR [rcx+456], r11 mov r11, QWORD PTR [rdx+472] mov r8, QWORD PTR [rsp+472] sbb r11, r8 mov QWORD PTR [rcx+464], r10 mov r10, QWORD PTR [rdx+480] mov r8, QWORD PTR [rsp+480] sbb r10, r8 mov QWORD PTR [rcx+472], r11 mov r11, QWORD PTR [rdx+488] mov r8, QWORD PTR [rsp+488] sbb r11, r8 mov QWORD PTR [rcx+480], r10 mov r10, QWORD PTR [rdx+496] mov r8, QWORD PTR [rsp+496] sbb r10, r8 mov QWORD PTR [rcx+488], r11 mov r11, QWORD PTR [rdx+504] mov r8, QWORD PTR [rsp+504] sbb r11, r8 mov QWORD PTR [rcx+496], r10 mov QWORD PTR [rcx+504], r11 sbb rax, rax add rsp, 512 ret sp_4096_cond_sub_64 ENDP _text ENDS ; /* Reduce the number back to 4096 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_4096_mont_reduce_64 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 64 mov r10, 64 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_4096_mont_reduce_64_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+40], r14 adc r11, 0 ; a[i+6] += m[6] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+48] mov r14, QWORD PTR [rcx+48] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+48], r14 adc r12, 0 ; a[i+7] += m[7] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+56] mov r14, QWORD PTR [rcx+56] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+56], r14 adc r11, 0 ; a[i+8] += m[8] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+64] mov r14, QWORD PTR [rcx+64] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+64], r14 adc r12, 0 ; a[i+9] += m[9] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+72] mov r14, QWORD PTR [rcx+72] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+72], r14 adc r11, 0 ; a[i+10] += m[10] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+80] mov r14, QWORD PTR [rcx+80] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+80], r14 adc r12, 0 ; a[i+11] += m[11] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+88] mov r14, QWORD PTR [rcx+88] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+88], r14 adc r11, 0 ; a[i+12] += m[12] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+96] mov r14, QWORD PTR [rcx+96] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+96], r14 adc r12, 0 ; a[i+13] += m[13] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+104] mov r14, QWORD PTR [rcx+104] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+104], r14 adc r11, 0 ; a[i+14] += m[14] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+112] mov r14, QWORD PTR [rcx+112] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+112], r14 adc r12, 0 ; a[i+15] += m[15] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+120] mov r14, QWORD PTR [rcx+120] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+120], r14 adc r11, 0 ; a[i+16] += m[16] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+128] mov r14, QWORD PTR [rcx+128] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+128], r14 adc r12, 0 ; a[i+17] += m[17] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+136] mov r14, QWORD PTR [rcx+136] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+136], r14 adc r11, 0 ; a[i+18] += m[18] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+144] mov r14, QWORD PTR [rcx+144] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+144], r14 adc r12, 0 ; a[i+19] += m[19] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+152] mov r14, QWORD PTR [rcx+152] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+152], r14 adc r11, 0 ; a[i+20] += m[20] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+160] mov r14, QWORD PTR [rcx+160] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+160], r14 adc r12, 0 ; a[i+21] += m[21] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+168] mov r14, QWORD PTR [rcx+168] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+168], r14 adc r11, 0 ; a[i+22] += m[22] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+176] mov r14, QWORD PTR [rcx+176] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+176], r14 adc r12, 0 ; a[i+23] += m[23] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+184] mov r14, QWORD PTR [rcx+184] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+184], r14 adc r11, 0 ; a[i+24] += m[24] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+192] mov r14, QWORD PTR [rcx+192] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+192], r14 adc r12, 0 ; a[i+25] += m[25] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+200] mov r14, QWORD PTR [rcx+200] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+200], r14 adc r11, 0 ; a[i+26] += m[26] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+208] mov r14, QWORD PTR [rcx+208] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+208], r14 adc r12, 0 ; a[i+27] += m[27] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+216] mov r14, QWORD PTR [rcx+216] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+216], r14 adc r11, 0 ; a[i+28] += m[28] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+224] mov r14, QWORD PTR [rcx+224] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+224], r14 adc r12, 0 ; a[i+29] += m[29] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+232] mov r14, QWORD PTR [rcx+232] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+232], r14 adc r11, 0 ; a[i+30] += m[30] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+240] mov r14, QWORD PTR [rcx+240] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+240], r14 adc r12, 0 ; a[i+31] += m[31] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+248] mov r14, QWORD PTR [rcx+248] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+248], r14 adc r11, 0 ; a[i+32] += m[32] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+256] mov r14, QWORD PTR [rcx+256] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+256], r14 adc r12, 0 ; a[i+33] += m[33] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+264] mov r14, QWORD PTR [rcx+264] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+264], r14 adc r11, 0 ; a[i+34] += m[34] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+272] mov r14, QWORD PTR [rcx+272] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+272], r14 adc r12, 0 ; a[i+35] += m[35] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+280] mov r14, QWORD PTR [rcx+280] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+280], r14 adc r11, 0 ; a[i+36] += m[36] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+288] mov r14, QWORD PTR [rcx+288] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+288], r14 adc r12, 0 ; a[i+37] += m[37] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+296] mov r14, QWORD PTR [rcx+296] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+296], r14 adc r11, 0 ; a[i+38] += m[38] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+304] mov r14, QWORD PTR [rcx+304] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+304], r14 adc r12, 0 ; a[i+39] += m[39] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+312] mov r14, QWORD PTR [rcx+312] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+312], r14 adc r11, 0 ; a[i+40] += m[40] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+320] mov r14, QWORD PTR [rcx+320] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+320], r14 adc r12, 0 ; a[i+41] += m[41] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+328] mov r14, QWORD PTR [rcx+328] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+328], r14 adc r11, 0 ; a[i+42] += m[42] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+336] mov r14, QWORD PTR [rcx+336] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+336], r14 adc r12, 0 ; a[i+43] += m[43] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+344] mov r14, QWORD PTR [rcx+344] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+344], r14 adc r11, 0 ; a[i+44] += m[44] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+352] mov r14, QWORD PTR [rcx+352] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+352], r14 adc r12, 0 ; a[i+45] += m[45] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+360] mov r14, QWORD PTR [rcx+360] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+360], r14 adc r11, 0 ; a[i+46] += m[46] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+368] mov r14, QWORD PTR [rcx+368] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+368], r14 adc r12, 0 ; a[i+47] += m[47] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+376] mov r14, QWORD PTR [rcx+376] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+376], r14 adc r11, 0 ; a[i+48] += m[48] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+384] mov r14, QWORD PTR [rcx+384] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+384], r14 adc r12, 0 ; a[i+49] += m[49] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+392] mov r14, QWORD PTR [rcx+392] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+392], r14 adc r11, 0 ; a[i+50] += m[50] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+400] mov r14, QWORD PTR [rcx+400] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+400], r14 adc r12, 0 ; a[i+51] += m[51] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+408] mov r14, QWORD PTR [rcx+408] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+408], r14 adc r11, 0 ; a[i+52] += m[52] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+416] mov r14, QWORD PTR [rcx+416] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+416], r14 adc r12, 0 ; a[i+53] += m[53] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+424] mov r14, QWORD PTR [rcx+424] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+424], r14 adc r11, 0 ; a[i+54] += m[54] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+432] mov r14, QWORD PTR [rcx+432] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+432], r14 adc r12, 0 ; a[i+55] += m[55] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+440] mov r14, QWORD PTR [rcx+440] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+440], r14 adc r11, 0 ; a[i+56] += m[56] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+448] mov r14, QWORD PTR [rcx+448] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+448], r14 adc r12, 0 ; a[i+57] += m[57] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+456] mov r14, QWORD PTR [rcx+456] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+456], r14 adc r11, 0 ; a[i+58] += m[58] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+464] mov r14, QWORD PTR [rcx+464] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+464], r14 adc r12, 0 ; a[i+59] += m[59] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+472] mov r14, QWORD PTR [rcx+472] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+472], r14 adc r11, 0 ; a[i+60] += m[60] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+480] mov r14, QWORD PTR [rcx+480] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+480], r14 adc r12, 0 ; a[i+61] += m[61] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+488] mov r14, QWORD PTR [rcx+488] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+488], r14 adc r11, 0 ; a[i+62] += m[62] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+496] mov r14, QWORD PTR [rcx+496] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+496], r14 adc r12, 0 ; a[i+63] += m[63] * mu mov rax, r13 mul QWORD PTR [r9+504] mov r14, QWORD PTR [rcx+504] add r12, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r12 mov QWORD PTR [rcx+504], r14 adc QWORD PTR [rcx+512], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_4096_mont_reduce_64_loop mov QWORD PTR [rcx], r15 mov QWORD PTR [rcx+8], rdi neg rsi IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx mov rcx, rcx sub rcx, 512 call sp_4096_cond_sub_64 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_4096_mont_reduce_64 ENDP _text ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_4096_sub_64 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 sbb r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 sbb r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 sbb r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 sbb r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 sbb r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 sbb r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 sbb r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 sbb r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 sbb r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 sbb r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 sbb r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 sbb r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 sbb r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 sbb r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 sbb r10, QWORD PTR [r8+120] mov r9, QWORD PTR [rdx+128] mov QWORD PTR [rcx+120], r10 sbb r9, QWORD PTR [r8+128] mov r10, QWORD PTR [rdx+136] mov QWORD PTR [rcx+128], r9 sbb r10, QWORD PTR [r8+136] mov r9, QWORD PTR [rdx+144] mov QWORD PTR [rcx+136], r10 sbb r9, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+152] mov QWORD PTR [rcx+144], r9 sbb r10, QWORD PTR [r8+152] mov r9, QWORD PTR [rdx+160] mov QWORD PTR [rcx+152], r10 sbb r9, QWORD PTR [r8+160] mov r10, QWORD PTR [rdx+168] mov QWORD PTR [rcx+160], r9 sbb r10, QWORD PTR [r8+168] mov r9, QWORD PTR [rdx+176] mov QWORD PTR [rcx+168], r10 sbb r9, QWORD PTR [r8+176] mov r10, QWORD PTR [rdx+184] mov QWORD PTR [rcx+176], r9 sbb r10, QWORD PTR [r8+184] mov r9, QWORD PTR [rdx+192] mov QWORD PTR [rcx+184], r10 sbb r9, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+200] mov QWORD PTR [rcx+192], r9 sbb r10, QWORD PTR [r8+200] mov r9, QWORD PTR [rdx+208] mov QWORD PTR [rcx+200], r10 sbb r9, QWORD PTR [r8+208] mov r10, QWORD PTR [rdx+216] mov QWORD PTR [rcx+208], r9 sbb r10, QWORD PTR [r8+216] mov r9, QWORD PTR [rdx+224] mov QWORD PTR [rcx+216], r10 sbb r9, QWORD PTR [r8+224] mov r10, QWORD PTR [rdx+232] mov QWORD PTR [rcx+224], r9 sbb r10, QWORD PTR [r8+232] mov r9, QWORD PTR [rdx+240] mov QWORD PTR [rcx+232], r10 sbb r9, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+248] mov QWORD PTR [rcx+240], r9 sbb r10, QWORD PTR [r8+248] mov r9, QWORD PTR [rdx+256] mov QWORD PTR [rcx+248], r10 sbb r9, QWORD PTR [r8+256] mov r10, QWORD PTR [rdx+264] mov QWORD PTR [rcx+256], r9 sbb r10, QWORD PTR [r8+264] mov r9, QWORD PTR [rdx+272] mov QWORD PTR [rcx+264], r10 sbb r9, QWORD PTR [r8+272] mov r10, QWORD PTR [rdx+280] mov QWORD PTR [rcx+272], r9 sbb r10, QWORD PTR [r8+280] mov r9, QWORD PTR [rdx+288] mov QWORD PTR [rcx+280], r10 sbb r9, QWORD PTR [r8+288] mov r10, QWORD PTR [rdx+296] mov QWORD PTR [rcx+288], r9 sbb r10, QWORD PTR [r8+296] mov r9, QWORD PTR [rdx+304] mov QWORD PTR [rcx+296], r10 sbb r9, QWORD PTR [r8+304] mov r10, QWORD PTR [rdx+312] mov QWORD PTR [rcx+304], r9 sbb r10, QWORD PTR [r8+312] mov r9, QWORD PTR [rdx+320] mov QWORD PTR [rcx+312], r10 sbb r9, QWORD PTR [r8+320] mov r10, QWORD PTR [rdx+328] mov QWORD PTR [rcx+320], r9 sbb r10, QWORD PTR [r8+328] mov r9, QWORD PTR [rdx+336] mov QWORD PTR [rcx+328], r10 sbb r9, QWORD PTR [r8+336] mov r10, QWORD PTR [rdx+344] mov QWORD PTR [rcx+336], r9 sbb r10, QWORD PTR [r8+344] mov r9, QWORD PTR [rdx+352] mov QWORD PTR [rcx+344], r10 sbb r9, QWORD PTR [r8+352] mov r10, QWORD PTR [rdx+360] mov QWORD PTR [rcx+352], r9 sbb r10, QWORD PTR [r8+360] mov r9, QWORD PTR [rdx+368] mov QWORD PTR [rcx+360], r10 sbb r9, QWORD PTR [r8+368] mov r10, QWORD PTR [rdx+376] mov QWORD PTR [rcx+368], r9 sbb r10, QWORD PTR [r8+376] mov r9, QWORD PTR [rdx+384] mov QWORD PTR [rcx+376], r10 sbb r9, QWORD PTR [r8+384] mov r10, QWORD PTR [rdx+392] mov QWORD PTR [rcx+384], r9 sbb r10, QWORD PTR [r8+392] mov r9, QWORD PTR [rdx+400] mov QWORD PTR [rcx+392], r10 sbb r9, QWORD PTR [r8+400] mov r10, QWORD PTR [rdx+408] mov QWORD PTR [rcx+400], r9 sbb r10, QWORD PTR [r8+408] mov r9, QWORD PTR [rdx+416] mov QWORD PTR [rcx+408], r10 sbb r9, QWORD PTR [r8+416] mov r10, QWORD PTR [rdx+424] mov QWORD PTR [rcx+416], r9 sbb r10, QWORD PTR [r8+424] mov r9, QWORD PTR [rdx+432] mov QWORD PTR [rcx+424], r10 sbb r9, QWORD PTR [r8+432] mov r10, QWORD PTR [rdx+440] mov QWORD PTR [rcx+432], r9 sbb r10, QWORD PTR [r8+440] mov r9, QWORD PTR [rdx+448] mov QWORD PTR [rcx+440], r10 sbb r9, QWORD PTR [r8+448] mov r10, QWORD PTR [rdx+456] mov QWORD PTR [rcx+448], r9 sbb r10, QWORD PTR [r8+456] mov r9, QWORD PTR [rdx+464] mov QWORD PTR [rcx+456], r10 sbb r9, QWORD PTR [r8+464] mov r10, QWORD PTR [rdx+472] mov QWORD PTR [rcx+464], r9 sbb r10, QWORD PTR [r8+472] mov r9, QWORD PTR [rdx+480] mov QWORD PTR [rcx+472], r10 sbb r9, QWORD PTR [r8+480] mov r10, QWORD PTR [rdx+488] mov QWORD PTR [rcx+480], r9 sbb r10, QWORD PTR [r8+488] mov r9, QWORD PTR [rdx+496] mov QWORD PTR [rcx+488], r10 sbb r9, QWORD PTR [r8+496] mov r10, QWORD PTR [rdx+504] mov QWORD PTR [rcx+496], r9 sbb r10, QWORD PTR [r8+504] mov QWORD PTR [rcx+504], r10 sbb rax, rax ret sp_4096_sub_64 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_4096_mul_d_avx2_64 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; A[6] * B mulx r10, r9, QWORD PTR [rax+48] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; A[7] * B mulx r10, r9, QWORD PTR [rax+56] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; A[8] * B mulx r10, r9, QWORD PTR [rax+64] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+64], r11 ; A[9] * B mulx r10, r9, QWORD PTR [rax+72] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+72], r12 ; A[10] * B mulx r10, r9, QWORD PTR [rax+80] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+80], r11 ; A[11] * B mulx r10, r9, QWORD PTR [rax+88] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+88], r12 ; A[12] * B mulx r10, r9, QWORD PTR [rax+96] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+96], r11 ; A[13] * B mulx r10, r9, QWORD PTR [rax+104] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+104], r12 ; A[14] * B mulx r10, r9, QWORD PTR [rax+112] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+112], r11 ; A[15] * B mulx r10, r9, QWORD PTR [rax+120] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+120], r12 ; A[16] * B mulx r10, r9, QWORD PTR [rax+128] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+128], r11 ; A[17] * B mulx r10, r9, QWORD PTR [rax+136] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+136], r12 ; A[18] * B mulx r10, r9, QWORD PTR [rax+144] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+144], r11 ; A[19] * B mulx r10, r9, QWORD PTR [rax+152] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+152], r12 ; A[20] * B mulx r10, r9, QWORD PTR [rax+160] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+160], r11 ; A[21] * B mulx r10, r9, QWORD PTR [rax+168] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+168], r12 ; A[22] * B mulx r10, r9, QWORD PTR [rax+176] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+176], r11 ; A[23] * B mulx r10, r9, QWORD PTR [rax+184] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+184], r12 ; A[24] * B mulx r10, r9, QWORD PTR [rax+192] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+192], r11 ; A[25] * B mulx r10, r9, QWORD PTR [rax+200] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+200], r12 ; A[26] * B mulx r10, r9, QWORD PTR [rax+208] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+208], r11 ; A[27] * B mulx r10, r9, QWORD PTR [rax+216] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+216], r12 ; A[28] * B mulx r10, r9, QWORD PTR [rax+224] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+224], r11 ; A[29] * B mulx r10, r9, QWORD PTR [rax+232] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+232], r12 ; A[30] * B mulx r10, r9, QWORD PTR [rax+240] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+240], r11 ; A[31] * B mulx r10, r9, QWORD PTR [rax+248] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+248], r12 ; A[32] * B mulx r10, r9, QWORD PTR [rax+256] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+256], r11 ; A[33] * B mulx r10, r9, QWORD PTR [rax+264] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+264], r12 ; A[34] * B mulx r10, r9, QWORD PTR [rax+272] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+272], r11 ; A[35] * B mulx r10, r9, QWORD PTR [rax+280] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+280], r12 ; A[36] * B mulx r10, r9, QWORD PTR [rax+288] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+288], r11 ; A[37] * B mulx r10, r9, QWORD PTR [rax+296] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+296], r12 ; A[38] * B mulx r10, r9, QWORD PTR [rax+304] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+304], r11 ; A[39] * B mulx r10, r9, QWORD PTR [rax+312] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+312], r12 ; A[40] * B mulx r10, r9, QWORD PTR [rax+320] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+320], r11 ; A[41] * B mulx r10, r9, QWORD PTR [rax+328] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+328], r12 ; A[42] * B mulx r10, r9, QWORD PTR [rax+336] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+336], r11 ; A[43] * B mulx r10, r9, QWORD PTR [rax+344] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+344], r12 ; A[44] * B mulx r10, r9, QWORD PTR [rax+352] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+352], r11 ; A[45] * B mulx r10, r9, QWORD PTR [rax+360] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+360], r12 ; A[46] * B mulx r10, r9, QWORD PTR [rax+368] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+368], r11 ; A[47] * B mulx r10, r9, QWORD PTR [rax+376] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+376], r12 ; A[48] * B mulx r10, r9, QWORD PTR [rax+384] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+384], r11 ; A[49] * B mulx r10, r9, QWORD PTR [rax+392] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+392], r12 ; A[50] * B mulx r10, r9, QWORD PTR [rax+400] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+400], r11 ; A[51] * B mulx r10, r9, QWORD PTR [rax+408] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+408], r12 ; A[52] * B mulx r10, r9, QWORD PTR [rax+416] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+416], r11 ; A[53] * B mulx r10, r9, QWORD PTR [rax+424] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+424], r12 ; A[54] * B mulx r10, r9, QWORD PTR [rax+432] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+432], r11 ; A[55] * B mulx r10, r9, QWORD PTR [rax+440] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+440], r12 ; A[56] * B mulx r10, r9, QWORD PTR [rax+448] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+448], r11 ; A[57] * B mulx r10, r9, QWORD PTR [rax+456] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+456], r12 ; A[58] * B mulx r10, r9, QWORD PTR [rax+464] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+464], r11 ; A[59] * B mulx r10, r9, QWORD PTR [rax+472] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+472], r12 ; A[60] * B mulx r10, r9, QWORD PTR [rax+480] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+480], r11 ; A[61] * B mulx r10, r9, QWORD PTR [rax+488] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+488], r12 ; A[62] * B mulx r10, r9, QWORD PTR [rax+496] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+496], r11 ; A[63] * B mulx r10, r9, QWORD PTR [rax+504] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+504], r12 mov QWORD PTR [rcx+512], r11 pop r13 pop r12 ret sp_4096_mul_d_avx2_64 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_4096_word_asm_64 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_4096_word_asm_64 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_4096_cond_sub_avx2_64 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 sbb r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 sbb r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 sbb r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 sbb r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 sbb r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 sbb r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 sbb r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 sbb r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 sbb r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 sbb r10, r11 mov r12, QWORD PTR [r8+128] mov r11, QWORD PTR [rdx+128] pext r12, r12, r9 mov QWORD PTR [rcx+120], r10 sbb r11, r12 mov r10, QWORD PTR [r8+136] mov r12, QWORD PTR [rdx+136] pext r10, r10, r9 mov QWORD PTR [rcx+128], r11 sbb r12, r10 mov r11, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+144] pext r11, r11, r9 mov QWORD PTR [rcx+136], r12 sbb r10, r11 mov r12, QWORD PTR [r8+152] mov r11, QWORD PTR [rdx+152] pext r12, r12, r9 mov QWORD PTR [rcx+144], r10 sbb r11, r12 mov r10, QWORD PTR [r8+160] mov r12, QWORD PTR [rdx+160] pext r10, r10, r9 mov QWORD PTR [rcx+152], r11 sbb r12, r10 mov r11, QWORD PTR [r8+168] mov r10, QWORD PTR [rdx+168] pext r11, r11, r9 mov QWORD PTR [rcx+160], r12 sbb r10, r11 mov r12, QWORD PTR [r8+176] mov r11, QWORD PTR [rdx+176] pext r12, r12, r9 mov QWORD PTR [rcx+168], r10 sbb r11, r12 mov r10, QWORD PTR [r8+184] mov r12, QWORD PTR [rdx+184] pext r10, r10, r9 mov QWORD PTR [rcx+176], r11 sbb r12, r10 mov r11, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+192] pext r11, r11, r9 mov QWORD PTR [rcx+184], r12 sbb r10, r11 mov r12, QWORD PTR [r8+200] mov r11, QWORD PTR [rdx+200] pext r12, r12, r9 mov QWORD PTR [rcx+192], r10 sbb r11, r12 mov r10, QWORD PTR [r8+208] mov r12, QWORD PTR [rdx+208] pext r10, r10, r9 mov QWORD PTR [rcx+200], r11 sbb r12, r10 mov r11, QWORD PTR [r8+216] mov r10, QWORD PTR [rdx+216] pext r11, r11, r9 mov QWORD PTR [rcx+208], r12 sbb r10, r11 mov r12, QWORD PTR [r8+224] mov r11, QWORD PTR [rdx+224] pext r12, r12, r9 mov QWORD PTR [rcx+216], r10 sbb r11, r12 mov r10, QWORD PTR [r8+232] mov r12, QWORD PTR [rdx+232] pext r10, r10, r9 mov QWORD PTR [rcx+224], r11 sbb r12, r10 mov r11, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+240] pext r11, r11, r9 mov QWORD PTR [rcx+232], r12 sbb r10, r11 mov r12, QWORD PTR [r8+248] mov r11, QWORD PTR [rdx+248] pext r12, r12, r9 mov QWORD PTR [rcx+240], r10 sbb r11, r12 mov r10, QWORD PTR [r8+256] mov r12, QWORD PTR [rdx+256] pext r10, r10, r9 mov QWORD PTR [rcx+248], r11 sbb r12, r10 mov r11, QWORD PTR [r8+264] mov r10, QWORD PTR [rdx+264] pext r11, r11, r9 mov QWORD PTR [rcx+256], r12 sbb r10, r11 mov r12, QWORD PTR [r8+272] mov r11, QWORD PTR [rdx+272] pext r12, r12, r9 mov QWORD PTR [rcx+264], r10 sbb r11, r12 mov r10, QWORD PTR [r8+280] mov r12, QWORD PTR [rdx+280] pext r10, r10, r9 mov QWORD PTR [rcx+272], r11 sbb r12, r10 mov r11, QWORD PTR [r8+288] mov r10, QWORD PTR [rdx+288] pext r11, r11, r9 mov QWORD PTR [rcx+280], r12 sbb r10, r11 mov r12, QWORD PTR [r8+296] mov r11, QWORD PTR [rdx+296] pext r12, r12, r9 mov QWORD PTR [rcx+288], r10 sbb r11, r12 mov r10, QWORD PTR [r8+304] mov r12, QWORD PTR [rdx+304] pext r10, r10, r9 mov QWORD PTR [rcx+296], r11 sbb r12, r10 mov r11, QWORD PTR [r8+312] mov r10, QWORD PTR [rdx+312] pext r11, r11, r9 mov QWORD PTR [rcx+304], r12 sbb r10, r11 mov r12, QWORD PTR [r8+320] mov r11, QWORD PTR [rdx+320] pext r12, r12, r9 mov QWORD PTR [rcx+312], r10 sbb r11, r12 mov r10, QWORD PTR [r8+328] mov r12, QWORD PTR [rdx+328] pext r10, r10, r9 mov QWORD PTR [rcx+320], r11 sbb r12, r10 mov r11, QWORD PTR [r8+336] mov r10, QWORD PTR [rdx+336] pext r11, r11, r9 mov QWORD PTR [rcx+328], r12 sbb r10, r11 mov r12, QWORD PTR [r8+344] mov r11, QWORD PTR [rdx+344] pext r12, r12, r9 mov QWORD PTR [rcx+336], r10 sbb r11, r12 mov r10, QWORD PTR [r8+352] mov r12, QWORD PTR [rdx+352] pext r10, r10, r9 mov QWORD PTR [rcx+344], r11 sbb r12, r10 mov r11, QWORD PTR [r8+360] mov r10, QWORD PTR [rdx+360] pext r11, r11, r9 mov QWORD PTR [rcx+352], r12 sbb r10, r11 mov r12, QWORD PTR [r8+368] mov r11, QWORD PTR [rdx+368] pext r12, r12, r9 mov QWORD PTR [rcx+360], r10 sbb r11, r12 mov r10, QWORD PTR [r8+376] mov r12, QWORD PTR [rdx+376] pext r10, r10, r9 mov QWORD PTR [rcx+368], r11 sbb r12, r10 mov r11, QWORD PTR [r8+384] mov r10, QWORD PTR [rdx+384] pext r11, r11, r9 mov QWORD PTR [rcx+376], r12 sbb r10, r11 mov r12, QWORD PTR [r8+392] mov r11, QWORD PTR [rdx+392] pext r12, r12, r9 mov QWORD PTR [rcx+384], r10 sbb r11, r12 mov r10, QWORD PTR [r8+400] mov r12, QWORD PTR [rdx+400] pext r10, r10, r9 mov QWORD PTR [rcx+392], r11 sbb r12, r10 mov r11, QWORD PTR [r8+408] mov r10, QWORD PTR [rdx+408] pext r11, r11, r9 mov QWORD PTR [rcx+400], r12 sbb r10, r11 mov r12, QWORD PTR [r8+416] mov r11, QWORD PTR [rdx+416] pext r12, r12, r9 mov QWORD PTR [rcx+408], r10 sbb r11, r12 mov r10, QWORD PTR [r8+424] mov r12, QWORD PTR [rdx+424] pext r10, r10, r9 mov QWORD PTR [rcx+416], r11 sbb r12, r10 mov r11, QWORD PTR [r8+432] mov r10, QWORD PTR [rdx+432] pext r11, r11, r9 mov QWORD PTR [rcx+424], r12 sbb r10, r11 mov r12, QWORD PTR [r8+440] mov r11, QWORD PTR [rdx+440] pext r12, r12, r9 mov QWORD PTR [rcx+432], r10 sbb r11, r12 mov r10, QWORD PTR [r8+448] mov r12, QWORD PTR [rdx+448] pext r10, r10, r9 mov QWORD PTR [rcx+440], r11 sbb r12, r10 mov r11, QWORD PTR [r8+456] mov r10, QWORD PTR [rdx+456] pext r11, r11, r9 mov QWORD PTR [rcx+448], r12 sbb r10, r11 mov r12, QWORD PTR [r8+464] mov r11, QWORD PTR [rdx+464] pext r12, r12, r9 mov QWORD PTR [rcx+456], r10 sbb r11, r12 mov r10, QWORD PTR [r8+472] mov r12, QWORD PTR [rdx+472] pext r10, r10, r9 mov QWORD PTR [rcx+464], r11 sbb r12, r10 mov r11, QWORD PTR [r8+480] mov r10, QWORD PTR [rdx+480] pext r11, r11, r9 mov QWORD PTR [rcx+472], r12 sbb r10, r11 mov r12, QWORD PTR [r8+488] mov r11, QWORD PTR [rdx+488] pext r12, r12, r9 mov QWORD PTR [rcx+480], r10 sbb r11, r12 mov r10, QWORD PTR [r8+496] mov r12, QWORD PTR [rdx+496] pext r10, r10, r9 mov QWORD PTR [rcx+488], r11 sbb r12, r10 mov r11, QWORD PTR [r8+504] mov r10, QWORD PTR [rdx+504] pext r11, r11, r9 mov QWORD PTR [rcx+496], r12 sbb r10, r11 mov QWORD PTR [rcx+504], r10 sbb rax, rax pop r12 ret sp_4096_cond_sub_avx2_64 ENDP _text ENDS ENDIF ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_4096_cmp_64 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+504] mov r12, QWORD PTR [rdx+504] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+496] mov r12, QWORD PTR [rdx+496] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+488] mov r12, QWORD PTR [rdx+488] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+480] mov r12, QWORD PTR [rdx+480] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+472] mov r12, QWORD PTR [rdx+472] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+464] mov r12, QWORD PTR [rdx+464] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+456] mov r12, QWORD PTR [rdx+456] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+448] mov r12, QWORD PTR [rdx+448] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+440] mov r12, QWORD PTR [rdx+440] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+432] mov r12, QWORD PTR [rdx+432] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+424] mov r12, QWORD PTR [rdx+424] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+416] mov r12, QWORD PTR [rdx+416] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+408] mov r12, QWORD PTR [rdx+408] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+400] mov r12, QWORD PTR [rdx+400] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+392] mov r12, QWORD PTR [rdx+392] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+384] mov r12, QWORD PTR [rdx+384] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+376] mov r12, QWORD PTR [rdx+376] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+368] mov r12, QWORD PTR [rdx+368] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+360] mov r12, QWORD PTR [rdx+360] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+352] mov r12, QWORD PTR [rdx+352] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+344] mov r12, QWORD PTR [rdx+344] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+336] mov r12, QWORD PTR [rdx+336] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+328] mov r12, QWORD PTR [rdx+328] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+320] mov r12, QWORD PTR [rdx+320] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+312] mov r12, QWORD PTR [rdx+312] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+304] mov r12, QWORD PTR [rdx+304] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+296] mov r12, QWORD PTR [rdx+296] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+288] mov r12, QWORD PTR [rdx+288] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+280] mov r12, QWORD PTR [rdx+280] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+272] mov r12, QWORD PTR [rdx+272] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+264] mov r12, QWORD PTR [rdx+264] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+256] mov r12, QWORD PTR [rdx+256] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+248] mov r12, QWORD PTR [rdx+248] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+240] mov r12, QWORD PTR [rdx+240] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+232] mov r12, QWORD PTR [rdx+232] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+224] mov r12, QWORD PTR [rdx+224] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+216] mov r12, QWORD PTR [rdx+216] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+208] mov r12, QWORD PTR [rdx+208] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+200] mov r12, QWORD PTR [rdx+200] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+192] mov r12, QWORD PTR [rdx+192] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+184] mov r12, QWORD PTR [rdx+184] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+176] mov r12, QWORD PTR [rdx+176] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+168] mov r12, QWORD PTR [rdx+168] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+160] mov r12, QWORD PTR [rdx+160] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+152] mov r12, QWORD PTR [rdx+152] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+144] mov r12, QWORD PTR [rdx+144] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+136] mov r12, QWORD PTR [rdx+136] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+128] mov r12, QWORD PTR [rdx+128] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+120] mov r12, QWORD PTR [rdx+120] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+112] mov r12, QWORD PTR [rdx+112] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+104] mov r12, QWORD PTR [rdx+104] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+96] mov r12, QWORD PTR [rdx+96] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+88] mov r12, QWORD PTR [rdx+88] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+80] mov r12, QWORD PTR [rdx+80] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+72] mov r12, QWORD PTR [rdx+72] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+64] mov r12, QWORD PTR [rdx+64] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+56] mov r12, QWORD PTR [rdx+56] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+48] mov r12, QWORD PTR [rdx+48] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_4096_cmp_64 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_4096_get_from_table_64 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax pxor xmm13, xmm13 pshufd xmm11, xmm11, 0 pshufd xmm10, xmm10, 0 ; START: 0-7 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 0-7 ; START: 8-15 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 64 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 8-15 ; START: 16-23 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 16-23 ; START: 24-31 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 192 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 24-31 ; START: 32-39 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 256 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 32-39 ; START: 40-47 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 320 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 40-47 ; START: 48-55 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 384 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 add rcx, 64 ; END: 48-55 ; START: 56-63 pxor xmm13, xmm13 pxor xmm4, xmm4 pxor xmm5, xmm5 pxor xmm6, xmm6 pxor xmm7, xmm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 448 movdqu xmm12, xmm13 pcmpeqd xmm12, xmm10 movdqu xmm0, OWORD PTR [r9] movdqu xmm1, OWORD PTR [r9+16] movdqu xmm2, OWORD PTR [r9+32] movdqu xmm3, OWORD PTR [r9+48] pand xmm0, xmm12 pand xmm1, xmm12 pand xmm2, xmm12 pand xmm3, xmm12 por xmm4, xmm0 por xmm5, xmm1 por xmm6, xmm2 por xmm7, xmm3 paddd xmm13, xmm11 movdqu OWORD PTR [rcx], xmm4 movdqu OWORD PTR [rcx+16], xmm5 movdqu OWORD PTR [rcx+32], xmm6 movdqu OWORD PTR [rcx+48], xmm7 ; END: 56-63 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_4096_get_from_table_64 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 4096 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_4096_mont_reduce_avx2_64 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r9, rcx mov r10, rdx xor rbp, rbp ; i = 64 mov r11, 64 mov r14, QWORD PTR [r9] mov r15, QWORD PTR [r9+8] mov rdi, QWORD PTR [r9+16] mov rsi, QWORD PTR [r9+24] add r9, 256 xor rbp, rbp L_4096_mont_reduce_avx2_64_loop: ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-224] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-216] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-208] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-216], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+-200] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-208], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+-192] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-200], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+-184] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-192], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+-176] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-184], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+-168] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-176], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9+-160] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-168], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+-152] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-160], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+-144] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-152], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+-136] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-144], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9+-128] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-136], r13 ; a[i+16] += m[16] * mu mulx rcx, rax, QWORD PTR [r10+128] mov r13, QWORD PTR [r9+-120] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-128], r12 ; a[i+17] += m[17] * mu mulx rcx, rax, QWORD PTR [r10+136] mov r12, QWORD PTR [r9+-112] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-120], r13 ; a[i+18] += m[18] * mu mulx rcx, rax, QWORD PTR [r10+144] mov r13, QWORD PTR [r9+-104] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-112], r12 ; a[i+19] += m[19] * mu mulx rcx, rax, QWORD PTR [r10+152] mov r12, QWORD PTR [r9+-96] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-104], r13 ; a[i+20] += m[20] * mu mulx rcx, rax, QWORD PTR [r10+160] mov r13, QWORD PTR [r9+-88] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-96], r12 ; a[i+21] += m[21] * mu mulx rcx, rax, QWORD PTR [r10+168] mov r12, QWORD PTR [r9+-80] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-88], r13 ; a[i+22] += m[22] * mu mulx rcx, rax, QWORD PTR [r10+176] mov r13, QWORD PTR [r9+-72] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-80], r12 ; a[i+23] += m[23] * mu mulx rcx, rax, QWORD PTR [r10+184] mov r12, QWORD PTR [r9+-64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-72], r13 ; a[i+24] += m[24] * mu mulx rcx, rax, QWORD PTR [r10+192] mov r13, QWORD PTR [r9+-56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-64], r12 ; a[i+25] += m[25] * mu mulx rcx, rax, QWORD PTR [r10+200] mov r12, QWORD PTR [r9+-48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-56], r13 ; a[i+26] += m[26] * mu mulx rcx, rax, QWORD PTR [r10+208] mov r13, QWORD PTR [r9+-40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-48], r12 ; a[i+27] += m[27] * mu mulx rcx, rax, QWORD PTR [r10+216] mov r12, QWORD PTR [r9+-32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-40], r13 ; a[i+28] += m[28] * mu mulx rcx, rax, QWORD PTR [r10+224] mov r13, QWORD PTR [r9+-24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-32], r12 ; a[i+29] += m[29] * mu mulx rcx, rax, QWORD PTR [r10+232] mov r12, QWORD PTR [r9+-16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-24], r13 ; a[i+30] += m[30] * mu mulx rcx, rax, QWORD PTR [r10+240] mov r13, QWORD PTR [r9+-8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-16], r12 ; a[i+31] += m[31] * mu mulx rcx, rax, QWORD PTR [r10+248] mov r12, QWORD PTR [r9] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-8], r13 ; a[i+32] += m[32] * mu mulx rcx, rax, QWORD PTR [r10+256] mov r13, QWORD PTR [r9+8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9], r12 ; a[i+33] += m[33] * mu mulx rcx, rax, QWORD PTR [r10+264] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+34] += m[34] * mu mulx rcx, rax, QWORD PTR [r10+272] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+35] += m[35] * mu mulx rcx, rax, QWORD PTR [r10+280] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+36] += m[36] * mu mulx rcx, rax, QWORD PTR [r10+288] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 ; a[i+37] += m[37] * mu mulx rcx, rax, QWORD PTR [r10+296] mov r12, QWORD PTR [r9+48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+40], r13 ; a[i+38] += m[38] * mu mulx rcx, rax, QWORD PTR [r10+304] mov r13, QWORD PTR [r9+56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+48], r12 ; a[i+39] += m[39] * mu mulx rcx, rax, QWORD PTR [r10+312] mov r12, QWORD PTR [r9+64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+56], r13 ; a[i+40] += m[40] * mu mulx rcx, rax, QWORD PTR [r10+320] mov r13, QWORD PTR [r9+72] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+64], r12 ; a[i+41] += m[41] * mu mulx rcx, rax, QWORD PTR [r10+328] mov r12, QWORD PTR [r9+80] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+72], r13 ; a[i+42] += m[42] * mu mulx rcx, rax, QWORD PTR [r10+336] mov r13, QWORD PTR [r9+88] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+80], r12 ; a[i+43] += m[43] * mu mulx rcx, rax, QWORD PTR [r10+344] mov r12, QWORD PTR [r9+96] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+88], r13 ; a[i+44] += m[44] * mu mulx rcx, rax, QWORD PTR [r10+352] mov r13, QWORD PTR [r9+104] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+96], r12 ; a[i+45] += m[45] * mu mulx rcx, rax, QWORD PTR [r10+360] mov r12, QWORD PTR [r9+112] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+104], r13 ; a[i+46] += m[46] * mu mulx rcx, rax, QWORD PTR [r10+368] mov r13, QWORD PTR [r9+120] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+112], r12 ; a[i+47] += m[47] * mu mulx rcx, rax, QWORD PTR [r10+376] mov r12, QWORD PTR [r9+128] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+120], r13 ; a[i+48] += m[48] * mu mulx rcx, rax, QWORD PTR [r10+384] mov r13, QWORD PTR [r9+136] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+128], r12 ; a[i+49] += m[49] * mu mulx rcx, rax, QWORD PTR [r10+392] mov r12, QWORD PTR [r9+144] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+136], r13 ; a[i+50] += m[50] * mu mulx rcx, rax, QWORD PTR [r10+400] mov r13, QWORD PTR [r9+152] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+144], r12 ; a[i+51] += m[51] * mu mulx rcx, rax, QWORD PTR [r10+408] mov r12, QWORD PTR [r9+160] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+152], r13 ; a[i+52] += m[52] * mu mulx rcx, rax, QWORD PTR [r10+416] mov r13, QWORD PTR [r9+168] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+160], r12 ; a[i+53] += m[53] * mu mulx rcx, rax, QWORD PTR [r10+424] mov r12, QWORD PTR [r9+176] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+168], r13 ; a[i+54] += m[54] * mu mulx rcx, rax, QWORD PTR [r10+432] mov r13, QWORD PTR [r9+184] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+176], r12 ; a[i+55] += m[55] * mu mulx rcx, rax, QWORD PTR [r10+440] mov r12, QWORD PTR [r9+192] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+184], r13 ; a[i+56] += m[56] * mu mulx rcx, rax, QWORD PTR [r10+448] mov r13, QWORD PTR [r9+200] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+192], r12 ; a[i+57] += m[57] * mu mulx rcx, rax, QWORD PTR [r10+456] mov r12, QWORD PTR [r9+208] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+200], r13 ; a[i+58] += m[58] * mu mulx rcx, rax, QWORD PTR [r10+464] mov r13, QWORD PTR [r9+216] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+208], r12 ; a[i+59] += m[59] * mu mulx rcx, rax, QWORD PTR [r10+472] mov r12, QWORD PTR [r9+224] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+216], r13 ; a[i+60] += m[60] * mu mulx rcx, rax, QWORD PTR [r10+480] mov r13, QWORD PTR [r9+232] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+224], r12 ; a[i+61] += m[61] * mu mulx rcx, rax, QWORD PTR [r10+488] mov r12, QWORD PTR [r9+240] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+232], r13 ; a[i+62] += m[62] * mu mulx rcx, rax, QWORD PTR [r10+496] mov r13, QWORD PTR [r9+248] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+240], r12 ; a[i+63] += m[63] * mu mulx rcx, rax, QWORD PTR [r10+504] mov r12, QWORD PTR [r9+256] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+248], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+256], r12 adox rbp, rbx adcx rbp, rbx ; a += 1 add r9, 8 ; i -= 1 sub r11, 1 jnz L_4096_mont_reduce_avx2_64_loop sub r9, 256 neg rbp mov r8, r9 sub r9, 512 mov rcx, QWORD PTR [r10] mov rdx, r14 pext rcx, rcx, rbp sub rdx, rcx mov rcx, QWORD PTR [r10+8] mov rax, r15 pext rcx, rcx, rbp mov QWORD PTR [r9], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+16] mov rcx, rdi pext rdx, rdx, rbp mov QWORD PTR [r9+8], rax sbb rcx, rdx mov rax, QWORD PTR [r10+24] mov rdx, rsi pext rax, rax, rbp mov QWORD PTR [r9+16], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+32] mov rax, QWORD PTR [r8+32] pext rcx, rcx, rbp mov QWORD PTR [r9+24], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+40] mov rcx, QWORD PTR [r8+40] pext rdx, rdx, rbp mov QWORD PTR [r9+32], rax sbb rcx, rdx mov rax, QWORD PTR [r10+48] mov rdx, QWORD PTR [r8+48] pext rax, rax, rbp mov QWORD PTR [r9+40], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+56] mov rax, QWORD PTR [r8+56] pext rcx, rcx, rbp mov QWORD PTR [r9+48], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+64] mov rcx, QWORD PTR [r8+64] pext rdx, rdx, rbp mov QWORD PTR [r9+56], rax sbb rcx, rdx mov rax, QWORD PTR [r10+72] mov rdx, QWORD PTR [r8+72] pext rax, rax, rbp mov QWORD PTR [r9+64], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+80] mov rax, QWORD PTR [r8+80] pext rcx, rcx, rbp mov QWORD PTR [r9+72], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+88] mov rcx, QWORD PTR [r8+88] pext rdx, rdx, rbp mov QWORD PTR [r9+80], rax sbb rcx, rdx mov rax, QWORD PTR [r10+96] mov rdx, QWORD PTR [r8+96] pext rax, rax, rbp mov QWORD PTR [r9+88], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+104] mov rax, QWORD PTR [r8+104] pext rcx, rcx, rbp mov QWORD PTR [r9+96], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+112] mov rcx, QWORD PTR [r8+112] pext rdx, rdx, rbp mov QWORD PTR [r9+104], rax sbb rcx, rdx mov rax, QWORD PTR [r10+120] mov rdx, QWORD PTR [r8+120] pext rax, rax, rbp mov QWORD PTR [r9+112], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+128] mov rax, QWORD PTR [r8+128] pext rcx, rcx, rbp mov QWORD PTR [r9+120], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+136] mov rcx, QWORD PTR [r8+136] pext rdx, rdx, rbp mov QWORD PTR [r9+128], rax sbb rcx, rdx mov rax, QWORD PTR [r10+144] mov rdx, QWORD PTR [r8+144] pext rax, rax, rbp mov QWORD PTR [r9+136], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+152] mov rax, QWORD PTR [r8+152] pext rcx, rcx, rbp mov QWORD PTR [r9+144], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+160] mov rcx, QWORD PTR [r8+160] pext rdx, rdx, rbp mov QWORD PTR [r9+152], rax sbb rcx, rdx mov rax, QWORD PTR [r10+168] mov rdx, QWORD PTR [r8+168] pext rax, rax, rbp mov QWORD PTR [r9+160], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+176] mov rax, QWORD PTR [r8+176] pext rcx, rcx, rbp mov QWORD PTR [r9+168], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+184] mov rcx, QWORD PTR [r8+184] pext rdx, rdx, rbp mov QWORD PTR [r9+176], rax sbb rcx, rdx mov rax, QWORD PTR [r10+192] mov rdx, QWORD PTR [r8+192] pext rax, rax, rbp mov QWORD PTR [r9+184], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+200] mov rax, QWORD PTR [r8+200] pext rcx, rcx, rbp mov QWORD PTR [r9+192], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+208] mov rcx, QWORD PTR [r8+208] pext rdx, rdx, rbp mov QWORD PTR [r9+200], rax sbb rcx, rdx mov rax, QWORD PTR [r10+216] mov rdx, QWORD PTR [r8+216] pext rax, rax, rbp mov QWORD PTR [r9+208], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+224] mov rax, QWORD PTR [r8+224] pext rcx, rcx, rbp mov QWORD PTR [r9+216], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+232] mov rcx, QWORD PTR [r8+232] pext rdx, rdx, rbp mov QWORD PTR [r9+224], rax sbb rcx, rdx mov rax, QWORD PTR [r10+240] mov rdx, QWORD PTR [r8+240] pext rax, rax, rbp mov QWORD PTR [r9+232], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+248] mov rax, QWORD PTR [r8+248] pext rcx, rcx, rbp mov QWORD PTR [r9+240], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+256] mov rcx, QWORD PTR [r8+256] pext rdx, rdx, rbp mov QWORD PTR [r9+248], rax sbb rcx, rdx mov rax, QWORD PTR [r10+264] mov rdx, QWORD PTR [r8+264] pext rax, rax, rbp mov QWORD PTR [r9+256], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+272] mov rax, QWORD PTR [r8+272] pext rcx, rcx, rbp mov QWORD PTR [r9+264], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+280] mov rcx, QWORD PTR [r8+280] pext rdx, rdx, rbp mov QWORD PTR [r9+272], rax sbb rcx, rdx mov rax, QWORD PTR [r10+288] mov rdx, QWORD PTR [r8+288] pext rax, rax, rbp mov QWORD PTR [r9+280], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+296] mov rax, QWORD PTR [r8+296] pext rcx, rcx, rbp mov QWORD PTR [r9+288], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+304] mov rcx, QWORD PTR [r8+304] pext rdx, rdx, rbp mov QWORD PTR [r9+296], rax sbb rcx, rdx mov rax, QWORD PTR [r10+312] mov rdx, QWORD PTR [r8+312] pext rax, rax, rbp mov QWORD PTR [r9+304], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+320] mov rax, QWORD PTR [r8+320] pext rcx, rcx, rbp mov QWORD PTR [r9+312], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+328] mov rcx, QWORD PTR [r8+328] pext rdx, rdx, rbp mov QWORD PTR [r9+320], rax sbb rcx, rdx mov rax, QWORD PTR [r10+336] mov rdx, QWORD PTR [r8+336] pext rax, rax, rbp mov QWORD PTR [r9+328], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+344] mov rax, QWORD PTR [r8+344] pext rcx, rcx, rbp mov QWORD PTR [r9+336], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+352] mov rcx, QWORD PTR [r8+352] pext rdx, rdx, rbp mov QWORD PTR [r9+344], rax sbb rcx, rdx mov rax, QWORD PTR [r10+360] mov rdx, QWORD PTR [r8+360] pext rax, rax, rbp mov QWORD PTR [r9+352], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+368] mov rax, QWORD PTR [r8+368] pext rcx, rcx, rbp mov QWORD PTR [r9+360], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+376] mov rcx, QWORD PTR [r8+376] pext rdx, rdx, rbp mov QWORD PTR [r9+368], rax sbb rcx, rdx mov rax, QWORD PTR [r10+384] mov rdx, QWORD PTR [r8+384] pext rax, rax, rbp mov QWORD PTR [r9+376], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+392] mov rax, QWORD PTR [r8+392] pext rcx, rcx, rbp mov QWORD PTR [r9+384], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+400] mov rcx, QWORD PTR [r8+400] pext rdx, rdx, rbp mov QWORD PTR [r9+392], rax sbb rcx, rdx mov rax, QWORD PTR [r10+408] mov rdx, QWORD PTR [r8+408] pext rax, rax, rbp mov QWORD PTR [r9+400], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+416] mov rax, QWORD PTR [r8+416] pext rcx, rcx, rbp mov QWORD PTR [r9+408], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+424] mov rcx, QWORD PTR [r8+424] pext rdx, rdx, rbp mov QWORD PTR [r9+416], rax sbb rcx, rdx mov rax, QWORD PTR [r10+432] mov rdx, QWORD PTR [r8+432] pext rax, rax, rbp mov QWORD PTR [r9+424], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+440] mov rax, QWORD PTR [r8+440] pext rcx, rcx, rbp mov QWORD PTR [r9+432], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+448] mov rcx, QWORD PTR [r8+448] pext rdx, rdx, rbp mov QWORD PTR [r9+440], rax sbb rcx, rdx mov rax, QWORD PTR [r10+456] mov rdx, QWORD PTR [r8+456] pext rax, rax, rbp mov QWORD PTR [r9+448], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+464] mov rax, QWORD PTR [r8+464] pext rcx, rcx, rbp mov QWORD PTR [r9+456], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+472] mov rcx, QWORD PTR [r8+472] pext rdx, rdx, rbp mov QWORD PTR [r9+464], rax sbb rcx, rdx mov rax, QWORD PTR [r10+480] mov rdx, QWORD PTR [r8+480] pext rax, rax, rbp mov QWORD PTR [r9+472], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+488] mov rax, QWORD PTR [r8+488] pext rcx, rcx, rbp mov QWORD PTR [r9+480], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+496] mov rcx, QWORD PTR [r8+496] pext rdx, rdx, rbp mov QWORD PTR [r9+488], rax sbb rcx, rdx mov rax, QWORD PTR [r10+504] mov rdx, QWORD PTR [r8+504] pext rax, rax, rbp mov QWORD PTR [r9+496], rcx sbb rdx, rax mov QWORD PTR [r9+504], rdx pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_4096_mont_reduce_avx2_64 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT _text SEGMENT READONLY PARA sp_4096_get_from_table_avx2_64 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 mov rax, 1 movd xmm10, r8 movd xmm11, rax vpxor ymm13, ymm13, ymm13 vpermd ymm10, ymm13, ymm10 vpermd ymm11, ymm13, ymm11 ; START: 0-15 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 add rcx, 128 ; END: 0-15 ; START: 16-31 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 128 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 add rcx, 128 ; END: 16-31 ; START: 32-47 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 256 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 add rcx, 128 ; END: 32-47 ; START: 48-63 vpxor ymm13, ymm13, ymm13 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 vpxor ymm6, ymm6, ymm6 vpxor ymm7, ymm7, ymm7 ; ENTRY: 0 mov r9, QWORD PTR [rdx] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 1 mov r9, QWORD PTR [rdx+8] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 2 mov r9, QWORD PTR [rdx+16] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 3 mov r9, QWORD PTR [rdx+24] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 4 mov r9, QWORD PTR [rdx+32] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 5 mov r9, QWORD PTR [rdx+40] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 6 mov r9, QWORD PTR [rdx+48] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 7 mov r9, QWORD PTR [rdx+56] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 8 mov r9, QWORD PTR [rdx+64] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 9 mov r9, QWORD PTR [rdx+72] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 10 mov r9, QWORD PTR [rdx+80] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 11 mov r9, QWORD PTR [rdx+88] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 12 mov r9, QWORD PTR [rdx+96] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 13 mov r9, QWORD PTR [rdx+104] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 14 mov r9, QWORD PTR [rdx+112] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 ; ENTRY: 15 mov r9, QWORD PTR [rdx+120] add r9, 384 vpcmpeqd ymm12, ymm13, ymm10 vmovdqu ymm0, YMMWORD PTR [r9] vmovdqu ymm1, YMMWORD PTR [r9+32] vmovdqu ymm2, YMMWORD PTR [r9+64] vmovdqu ymm3, YMMWORD PTR [r9+96] vpand ymm0, ymm0, ymm12 vpand ymm1, ymm1, ymm12 vpand ymm2, ymm2, ymm12 vpand ymm3, ymm3, ymm12 vpor ymm4, ymm4, ymm0 vpor ymm5, ymm5, ymm1 vpor ymm6, ymm6, ymm2 vpor ymm7, ymm7, ymm3 vpaddd ymm13, ymm13, ymm11 vmovdqu YMMWORD PTR [rcx], ymm4 vmovdqu YMMWORD PTR [rcx+32], ymm5 vmovdqu YMMWORD PTR [rcx+64], ymm6 vmovdqu YMMWORD PTR [rcx+96], ymm7 ; END: 48-63 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] add rsp, 128 ret sp_4096_get_from_table_avx2_64 ENDP _text ENDS ENDIF ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. ; * ; * r A single precision number representing conditional add result. ; * a A single precision number to add with. ; * b A single precision number to add. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_4096_cond_add_32 PROC sub rsp, 256 mov rax, 0 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [r8+128] mov r11, QWORD PTR [r8+136] and r10, r9 and r11, r9 mov QWORD PTR [rsp+128], r10 mov QWORD PTR [rsp+136], r11 mov r10, QWORD PTR [r8+144] mov r11, QWORD PTR [r8+152] and r10, r9 and r11, r9 mov QWORD PTR [rsp+144], r10 mov QWORD PTR [rsp+152], r11 mov r10, QWORD PTR [r8+160] mov r11, QWORD PTR [r8+168] and r10, r9 and r11, r9 mov QWORD PTR [rsp+160], r10 mov QWORD PTR [rsp+168], r11 mov r10, QWORD PTR [r8+176] mov r11, QWORD PTR [r8+184] and r10, r9 and r11, r9 mov QWORD PTR [rsp+176], r10 mov QWORD PTR [rsp+184], r11 mov r10, QWORD PTR [r8+192] mov r11, QWORD PTR [r8+200] and r10, r9 and r11, r9 mov QWORD PTR [rsp+192], r10 mov QWORD PTR [rsp+200], r11 mov r10, QWORD PTR [r8+208] mov r11, QWORD PTR [r8+216] and r10, r9 and r11, r9 mov QWORD PTR [rsp+208], r10 mov QWORD PTR [rsp+216], r11 mov r10, QWORD PTR [r8+224] mov r11, QWORD PTR [r8+232] and r10, r9 and r11, r9 mov QWORD PTR [rsp+224], r10 mov QWORD PTR [rsp+232], r11 mov r10, QWORD PTR [r8+240] mov r11, QWORD PTR [r8+248] and r10, r9 and r11, r9 mov QWORD PTR [rsp+240], r10 mov QWORD PTR [rsp+248], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] add r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] adc r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] adc r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] adc r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] adc r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] adc r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] adc r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] adc r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] adc r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] adc r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] adc r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] adc r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] adc r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] adc r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] adc r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] adc r11, r8 mov QWORD PTR [rcx+112], r10 mov r10, QWORD PTR [rdx+128] mov r8, QWORD PTR [rsp+128] adc r10, r8 mov QWORD PTR [rcx+120], r11 mov r11, QWORD PTR [rdx+136] mov r8, QWORD PTR [rsp+136] adc r11, r8 mov QWORD PTR [rcx+128], r10 mov r10, QWORD PTR [rdx+144] mov r8, QWORD PTR [rsp+144] adc r10, r8 mov QWORD PTR [rcx+136], r11 mov r11, QWORD PTR [rdx+152] mov r8, QWORD PTR [rsp+152] adc r11, r8 mov QWORD PTR [rcx+144], r10 mov r10, QWORD PTR [rdx+160] mov r8, QWORD PTR [rsp+160] adc r10, r8 mov QWORD PTR [rcx+152], r11 mov r11, QWORD PTR [rdx+168] mov r8, QWORD PTR [rsp+168] adc r11, r8 mov QWORD PTR [rcx+160], r10 mov r10, QWORD PTR [rdx+176] mov r8, QWORD PTR [rsp+176] adc r10, r8 mov QWORD PTR [rcx+168], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rsp+184] adc r11, r8 mov QWORD PTR [rcx+176], r10 mov r10, QWORD PTR [rdx+192] mov r8, QWORD PTR [rsp+192] adc r10, r8 mov QWORD PTR [rcx+184], r11 mov r11, QWORD PTR [rdx+200] mov r8, QWORD PTR [rsp+200] adc r11, r8 mov QWORD PTR [rcx+192], r10 mov r10, QWORD PTR [rdx+208] mov r8, QWORD PTR [rsp+208] adc r10, r8 mov QWORD PTR [rcx+200], r11 mov r11, QWORD PTR [rdx+216] mov r8, QWORD PTR [rsp+216] adc r11, r8 mov QWORD PTR [rcx+208], r10 mov r10, QWORD PTR [rdx+224] mov r8, QWORD PTR [rsp+224] adc r10, r8 mov QWORD PTR [rcx+216], r11 mov r11, QWORD PTR [rdx+232] mov r8, QWORD PTR [rsp+232] adc r11, r8 mov QWORD PTR [rcx+224], r10 mov r10, QWORD PTR [rdx+240] mov r8, QWORD PTR [rsp+240] adc r10, r8 mov QWORD PTR [rcx+232], r11 mov r11, QWORD PTR [rdx+248] mov r8, QWORD PTR [rsp+248] adc r11, r8 mov QWORD PTR [rcx+240], r10 mov QWORD PTR [rcx+248], r11 adc rax, 0 add rsp, 256 ret sp_4096_cond_add_32 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. ; * ; * r A single precision number representing conditional add result. ; * a A single precision number to add with. ; * b A single precision number to add. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_4096_cond_add_avx2_32 PROC push r12 mov rax, 0 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 add r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 adc r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 adc r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 adc r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 adc r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 adc r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 adc r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 adc r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 adc r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 adc r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 adc r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 adc r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 adc r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 adc r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 adc r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 adc r10, r11 mov r12, QWORD PTR [r8+128] mov r11, QWORD PTR [rdx+128] pext r12, r12, r9 mov QWORD PTR [rcx+120], r10 adc r11, r12 mov r10, QWORD PTR [r8+136] mov r12, QWORD PTR [rdx+136] pext r10, r10, r9 mov QWORD PTR [rcx+128], r11 adc r12, r10 mov r11, QWORD PTR [r8+144] mov r10, QWORD PTR [rdx+144] pext r11, r11, r9 mov QWORD PTR [rcx+136], r12 adc r10, r11 mov r12, QWORD PTR [r8+152] mov r11, QWORD PTR [rdx+152] pext r12, r12, r9 mov QWORD PTR [rcx+144], r10 adc r11, r12 mov r10, QWORD PTR [r8+160] mov r12, QWORD PTR [rdx+160] pext r10, r10, r9 mov QWORD PTR [rcx+152], r11 adc r12, r10 mov r11, QWORD PTR [r8+168] mov r10, QWORD PTR [rdx+168] pext r11, r11, r9 mov QWORD PTR [rcx+160], r12 adc r10, r11 mov r12, QWORD PTR [r8+176] mov r11, QWORD PTR [rdx+176] pext r12, r12, r9 mov QWORD PTR [rcx+168], r10 adc r11, r12 mov r10, QWORD PTR [r8+184] mov r12, QWORD PTR [rdx+184] pext r10, r10, r9 mov QWORD PTR [rcx+176], r11 adc r12, r10 mov r11, QWORD PTR [r8+192] mov r10, QWORD PTR [rdx+192] pext r11, r11, r9 mov QWORD PTR [rcx+184], r12 adc r10, r11 mov r12, QWORD PTR [r8+200] mov r11, QWORD PTR [rdx+200] pext r12, r12, r9 mov QWORD PTR [rcx+192], r10 adc r11, r12 mov r10, QWORD PTR [r8+208] mov r12, QWORD PTR [rdx+208] pext r10, r10, r9 mov QWORD PTR [rcx+200], r11 adc r12, r10 mov r11, QWORD PTR [r8+216] mov r10, QWORD PTR [rdx+216] pext r11, r11, r9 mov QWORD PTR [rcx+208], r12 adc r10, r11 mov r12, QWORD PTR [r8+224] mov r11, QWORD PTR [rdx+224] pext r12, r12, r9 mov QWORD PTR [rcx+216], r10 adc r11, r12 mov r10, QWORD PTR [r8+232] mov r12, QWORD PTR [rdx+232] pext r10, r10, r9 mov QWORD PTR [rcx+224], r11 adc r12, r10 mov r11, QWORD PTR [r8+240] mov r10, QWORD PTR [rdx+240] pext r11, r11, r9 mov QWORD PTR [rcx+232], r12 adc r10, r11 mov r12, QWORD PTR [r8+248] mov r11, QWORD PTR [rdx+248] pext r12, r12, r9 mov QWORD PTR [rcx+240], r10 adc r11, r12 mov QWORD PTR [rcx+248], r11 adc rax, 0 pop r12 ret sp_4096_cond_add_avx2_32 ENDP _text ENDS ENDIF ; /* Shift number left by n bit. (r = a << n) ; * ; * r Result of left shift by n. ; * a Number to shift. ; * n Amoutnt o shift. ; */ _text SEGMENT READONLY PARA sp_4096_lshift_64 PROC push r12 push r13 mov rax, rcx mov cl, r8b mov r12, 0 mov r13, QWORD PTR [rdx+472] mov r8, QWORD PTR [rdx+480] mov r9, QWORD PTR [rdx+488] mov r10, QWORD PTR [rdx+496] mov r11, QWORD PTR [rdx+504] shld r12, r11, cl shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+480], r8 mov QWORD PTR [rax+488], r9 mov QWORD PTR [rax+496], r10 mov QWORD PTR [rax+504], r11 mov QWORD PTR [rax+512], r12 mov r11, QWORD PTR [rdx+440] mov r8, QWORD PTR [rdx+448] mov r9, QWORD PTR [rdx+456] mov r10, QWORD PTR [rdx+464] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+448], r8 mov QWORD PTR [rax+456], r9 mov QWORD PTR [rax+464], r10 mov QWORD PTR [rax+472], r13 mov r13, QWORD PTR [rdx+408] mov r8, QWORD PTR [rdx+416] mov r9, QWORD PTR [rdx+424] mov r10, QWORD PTR [rdx+432] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+416], r8 mov QWORD PTR [rax+424], r9 mov QWORD PTR [rax+432], r10 mov QWORD PTR [rax+440], r11 mov r11, QWORD PTR [rdx+376] mov r8, QWORD PTR [rdx+384] mov r9, QWORD PTR [rdx+392] mov r10, QWORD PTR [rdx+400] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+384], r8 mov QWORD PTR [rax+392], r9 mov QWORD PTR [rax+400], r10 mov QWORD PTR [rax+408], r13 mov r13, QWORD PTR [rdx+344] mov r8, QWORD PTR [rdx+352] mov r9, QWORD PTR [rdx+360] mov r10, QWORD PTR [rdx+368] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+352], r8 mov QWORD PTR [rax+360], r9 mov QWORD PTR [rax+368], r10 mov QWORD PTR [rax+376], r11 mov r11, QWORD PTR [rdx+312] mov r8, QWORD PTR [rdx+320] mov r9, QWORD PTR [rdx+328] mov r10, QWORD PTR [rdx+336] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+320], r8 mov QWORD PTR [rax+328], r9 mov QWORD PTR [rax+336], r10 mov QWORD PTR [rax+344], r13 mov r13, QWORD PTR [rdx+280] mov r8, QWORD PTR [rdx+288] mov r9, QWORD PTR [rdx+296] mov r10, QWORD PTR [rdx+304] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+288], r8 mov QWORD PTR [rax+296], r9 mov QWORD PTR [rax+304], r10 mov QWORD PTR [rax+312], r11 mov r11, QWORD PTR [rdx+248] mov r8, QWORD PTR [rdx+256] mov r9, QWORD PTR [rdx+264] mov r10, QWORD PTR [rdx+272] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+256], r8 mov QWORD PTR [rax+264], r9 mov QWORD PTR [rax+272], r10 mov QWORD PTR [rax+280], r13 mov r13, QWORD PTR [rdx+216] mov r8, QWORD PTR [rdx+224] mov r9, QWORD PTR [rdx+232] mov r10, QWORD PTR [rdx+240] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+224], r8 mov QWORD PTR [rax+232], r9 mov QWORD PTR [rax+240], r10 mov QWORD PTR [rax+248], r11 mov r11, QWORD PTR [rdx+184] mov r8, QWORD PTR [rdx+192] mov r9, QWORD PTR [rdx+200] mov r10, QWORD PTR [rdx+208] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+192], r8 mov QWORD PTR [rax+200], r9 mov QWORD PTR [rax+208], r10 mov QWORD PTR [rax+216], r13 mov r13, QWORD PTR [rdx+152] mov r8, QWORD PTR [rdx+160] mov r9, QWORD PTR [rdx+168] mov r10, QWORD PTR [rdx+176] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+160], r8 mov QWORD PTR [rax+168], r9 mov QWORD PTR [rax+176], r10 mov QWORD PTR [rax+184], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rdx+128] mov r9, QWORD PTR [rdx+136] mov r10, QWORD PTR [rdx+144] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+128], r8 mov QWORD PTR [rax+136], r9 mov QWORD PTR [rax+144], r10 mov QWORD PTR [rax+152], r13 mov r13, QWORD PTR [rdx+88] mov r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rdx+104] mov r10, QWORD PTR [rdx+112] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+96], r8 mov QWORD PTR [rax+104], r9 mov QWORD PTR [rax+112], r10 mov QWORD PTR [rax+120], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rdx+72] mov r10, QWORD PTR [rdx+80] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+64], r8 mov QWORD PTR [rax+72], r9 mov QWORD PTR [rax+80], r10 mov QWORD PTR [rax+88], r13 mov r13, QWORD PTR [rdx+24] mov r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+32], r8 mov QWORD PTR [rax+40], r9 mov QWORD PTR [rax+48], r10 mov QWORD PTR [rax+56], r11 mov r8, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shl r8, cl mov QWORD PTR [rax], r8 mov QWORD PTR [rax+8], r9 mov QWORD PTR [rax+16], r10 mov QWORD PTR [rax+24], r13 pop r13 pop r12 ret sp_4096_lshift_64 ENDP _text ENDS ENDIF ENDIF IFNDEF WOLFSSL_SP_NO_256 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_256_mul_4 PROC push r12 mov r9, rdx sub rsp, 32 ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9] xor r12, r12 mov QWORD PTR [rsp], rax mov r11, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+8], r11 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+16], r12 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+24], r10 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+8] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+32], r11 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+16] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+40], r12 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r10, QWORD PTR [rsp+16] mov r11, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 add rsp, 32 pop r12 ret sp_256_mul_4 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r Result of multiplication. ; * a First number to multiply. ; * b Second number to multiply. ; */ _text SEGMENT READONLY PARA sp_256_mul_avx2_4 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rbp, r8 mov rax, rdx mov rdx, QWORD PTR [rax] mov r14, QWORD PTR [rbp+8] ; A[0] * B[0] mulx r9, r8, QWORD PTR [rbp] xor rbx, rbx ; A[0] * B[1] mulx r10, rdi, r14 adcx r9, rdi ; A[0] * B[2] mulx r11, rdi, QWORD PTR [rbp+16] adcx r10, rdi ; A[0] * B[3] mulx r12, rdi, QWORD PTR [rbp+24] adcx r11, rdi mov rdx, QWORD PTR [rax+8] adcx r12, rbx ; A[1] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r9, rdi ; A[1] * B[1] mulx r15, rdi, r14 adox r10, rsi adcx r10, rdi ; A[1] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r11, r15 adcx r11, rdi ; A[1] * B[3] mulx r13, rdi, QWORD PTR [rbp+24] adox r12, rsi adcx r12, rdi adox r13, rbx mov rdx, QWORD PTR [rax+16] adcx r13, rbx ; A[2] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r10, rdi ; A[2] * B[1] mulx r15, rdi, r14 adox r11, rsi adcx r11, rdi ; A[2] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r12, r15 adcx r12, rdi ; A[2] * B[3] mulx r14, rdi, QWORD PTR [rbp+24] adox r13, rsi adcx r13, rdi adox r14, rbx mov rdx, QWORD PTR [rax+24] adcx r14, rbx ; A[3] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r11, rdi ; A[3] * B[1] mulx r15, rdi, QWORD PTR [rbp+8] adox r12, rsi adcx r12, rdi ; A[3] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r13, r15 adcx r13, rdi ; A[3] * B[3] mulx r15, rdi, QWORD PTR [rbp+24] adox r14, rsi adcx r14, rdi adox r15, rbx adcx r15, rbx mov QWORD PTR [rcx], r8 mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r13 mov QWORD PTR [rcx+48], r14 mov QWORD PTR [rcx+56], r15 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_256_mul_avx2_4 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_256_sqr_4 PROC push r12 push r13 push r14 mov r8, rdx sub rsp, 32 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax xor r11, r11 mov QWORD PTR [rsp], rax mov r10, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+8], r10 ; A[0] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rsp+16], r11 ; A[0] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[1] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+8] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+24], r9 ; A[1] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rcx+32], r10 ; A[2] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+16] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+40], r11 ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax add r9, rax adc r10, rdx mov QWORD PTR [rcx+48], r9 mov QWORD PTR [rcx+56], r10 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r12, QWORD PTR [rsp+16] mov r13, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 add rsp, 32 pop r14 pop r13 pop r12 ret sp_256_sqr_4 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; */ _text SEGMENT READONLY PARA sp_256_sqr_avx2_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rax, rdx xor r8, r8 mov rdx, QWORD PTR [rax] mov rsi, QWORD PTR [rax+8] mov rbx, QWORD PTR [rax+16] mov r15, QWORD PTR [rax+24] ; A[0] * A[1] mulx r10, r9, rsi ; A[0] * A[2] mulx r11, r8, rbx adox r10, r8 ; A[0] * A[3] mulx r12, r8, r15 mov rdx, rsi adox r11, r8 ; A[1] * A[2] mulx rdi, r8, rbx mov rdx, r15 adcx r11, r8 ; A[1] * A[3] mulx r13, r8, rsi mov r15, 0 adox r12, rdi adcx r12, r8 ; A[2] * A[3] mulx r14, r8, rbx adox r13, r15 adcx r13, r8 adox r14, r15 adcx r14, r15 ; Double with Carry Flag xor r15, r15 ; A[0] * A[0] mov rdx, QWORD PTR [rax] mulx rdi, r8, rdx adcx r9, r9 adcx r10, r10 adox r9, rdi ; A[1] * A[1] mov rdx, QWORD PTR [rax+8] mulx rbx, rsi, rdx adcx r11, r11 adox r10, rsi ; A[2] * A[2] mov rdx, QWORD PTR [rax+16] mulx rsi, rdi, rdx adcx r12, r12 adox r11, rbx adcx r13, r13 adox r12, rdi adcx r14, r14 ; A[3] * A[3] mov rdx, QWORD PTR [rax+24] mulx rbx, rdi, rdx adox r13, rsi adcx r15, r15 adox r14, rdi adox r15, rbx mov QWORD PTR [rcx], r8 mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r13 mov QWORD PTR [rcx+48], r14 mov QWORD PTR [rcx+56], r15 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_sqr_avx2_4 ENDP _text ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_256_add_4 PROC push r12 xor rax, rax mov r9, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] add r9, QWORD PTR [r8] adc r10, QWORD PTR [r8+8] adc r11, QWORD PTR [r8+16] adc r12, QWORD PTR [r8+24] mov QWORD PTR [rcx], r9 mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 adc rax, 0 pop r12 ret sp_256_add_4 ENDP _text ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_256_sub_4 PROC push r12 xor rax, rax mov r9, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] sub r9, QWORD PTR [r8] sbb r10, QWORD PTR [r8+8] sbb r11, QWORD PTR [r8+16] sbb r12, QWORD PTR [r8+24] mov QWORD PTR [rcx], r9 mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 sbb rax, rax pop r12 ret sp_256_sub_4 ENDP _text ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * ; * r A single precision number to copy over. ; * a A single precision number to copy. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_256_cond_copy_4 PROC mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [rcx+16] mov r11, QWORD PTR [rcx+24] xor rax, QWORD PTR [rdx] xor r9, QWORD PTR [rdx+8] xor r10, QWORD PTR [rdx+16] xor r11, QWORD PTR [rdx+24] and rax, r8 and r9, r8 and r10, r8 and r11, r8 xor QWORD PTR [rcx], rax xor QWORD PTR [rcx+8], r9 xor QWORD PTR [rcx+16], r10 xor QWORD PTR [rcx+24], r11 ret sp_256_cond_copy_4 ENDP _text ENDS ; /* Multiply two Montgomery form numbers mod the modulus (prime). ; * (r = a * b mod m) ; * ; * r Result of multiplication. ; * a First number to multiply in Montgomery form. ; * b Second number to multiply in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_256_mont_mul_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov r10, rdx ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r10] mov r11, rax mov r12, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r10] xor r13, r13 add r12, rax adc r13, rdx ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r10+8] xor r14, r14 add r12, rax adc r13, rdx adc r14, 0 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r10] add r13, rax adc r14, rdx ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r10+8] xor r15, r15 add r13, rax adc r14, rdx adc r15, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r10+16] add r13, rax adc r14, rdx adc r15, 0 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r10] xor rdi, rdi add r14, rax adc r15, rdx adc rdi, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r10+8] add r14, rax adc r15, rdx adc rdi, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r10+16] add r14, rax adc r15, rdx adc rdi, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r10+24] add r14, rax adc r15, rdx adc rdi, 0 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r10+8] xor rsi, rsi add r15, rax adc rdi, rdx adc rsi, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r10+16] add r15, rax adc rdi, rdx adc rsi, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r10+24] add r15, rax adc rdi, rdx adc rsi, 0 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r10+16] xor rbx, rbx add rdi, rax adc rsi, rdx adc rbx, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r10+24] add rdi, rax adc rsi, rdx adc rbx, 0 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r10+24] add rsi, rax adc rbx, rdx ; Start Reduction ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 ; - a[0] << 32 << 192 ; a[0]-a[3] + (a[0] * 2) << 192 mov rax, r11 lea rdx, QWORD PTR [r14+2*r11] mov r10, r12 mov r8, r13 mov r9, r13 ; a[0]-a[2] << 32 shl r11, 32 shld r9, r10, 32 shld r12, rax, 32 ; - a[0] << 32 << 192 sub rdx, r11 ; + a[0]-a[2] << 32 << 64 add r10, r11 adc r8, r12 adc rdx, r9 ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu xor r9, r9 ; a += mu << 256 add r15, rax adc rdi, r10 adc rsi, r8 adc rbx, rdx sbb r11, r11 ; a += mu << 192 add r14, rax adc r15, r10 mov r12, r10 adc rdi, r8 adc rsi, rdx adc rbx, 0 sbb r11, 0 ; mu <<= 32 shld r9, rdx, 32 shld rdx, r8, 32 shld r8, r10, 32 shld r10, rax, 32 shl rax, 32 ; a -= (mu << 32) << 192 sub r14, rax sbb r15, r10 sbb rdi, r8 sbb rsi, rdx sbb rbx, r9 adc r11, 0 ; a += (mu << 32) << 64 sub r12, rax adc r13, r10 adc r14, r8 adc r15, rdx adc rdi, r9 adc rsi, 0 adc rbx, 0 sbb r11, 0 mov r10, 18446744069414584321 ; mask m and sub from result if overflow ; m[0] = -1 & mask = mask ; m[2] = 0 & mask = 0 mov eax, r11d and r10, r11 sub r15, r11 sbb rdi, rax mov QWORD PTR [rcx], r15 sbb rsi, 0 mov QWORD PTR [rcx+8], rdi sbb rbx, r10 mov QWORD PTR [rcx+16], rsi mov QWORD PTR [rcx+24], rbx pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mont_mul_4 ENDP _text ENDS ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_256_mont_sqr_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov r8, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8] mul QWORD PTR [r8+8] mov r11, rax mov r12, rdx ; A[0] * A[2] mov rax, QWORD PTR [r8] mul QWORD PTR [r8+16] xor r13, r13 add r12, rax adc r13, rdx ; A[0] * A[3] mov rax, QWORD PTR [r8] mul QWORD PTR [r8+24] xor r14, r14 add r13, rax adc r14, rdx ; A[1] * A[2] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8+16] xor r15, r15 add r13, rax adc r14, rdx adc r15, 0 ; A[1] * A[3] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8+24] add r14, rax adc r15, rdx ; A[2] * A[3] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+24] xor rdi, rdi add r15, rax adc rdi, rdx ; Double xor rsi, rsi add r11, r11 adc r12, r12 adc r13, r13 adc r14, r14 adc r15, r15 adc rdi, rdi adc rsi, 0 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax mov rax, rax mov rdx, rdx mov r10, rax mov rbx, rdx ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax mov rax, rax mov rdx, rdx add r11, rbx adc r12, rax adc rdx, 0 mov rbx, rdx ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax mov rax, rax mov rdx, rdx add r13, rbx adc r14, rax adc rdx, 0 mov rbx, rdx ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax mov rax, rax mov rdx, rdx add r15, rbx adc rdi, rax adc rsi, rdx ; Start Reduction ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 ; - a[0] << 32 << 192 ; a[0]-a[3] + (a[0] * 2) << 192 mov rax, r10 lea rdx, QWORD PTR [r13+2*r10] mov r8, r11 mov rbx, r12 mov r9, r12 ; a[0]-a[2] << 32 shl r10, 32 shld r9, r8, 32 shld r11, rax, 32 ; - a[0] << 32 << 192 sub rdx, r10 ; + a[0]-a[2] << 32 << 64 add r8, r10 adc rbx, r11 adc rdx, r9 ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu xor r9, r9 ; a += mu << 256 add r14, rax adc r15, r8 adc rdi, rbx adc rsi, rdx sbb r10, r10 ; a += mu << 192 add r13, rax adc r14, r8 mov r11, r8 adc r15, rbx adc rdi, rdx adc rsi, 0 sbb r10, 0 ; mu <<= 32 shld r9, rdx, 32 shld rdx, rbx, 32 shld rbx, r8, 32 shld r8, rax, 32 shl rax, 32 ; a -= (mu << 32) << 192 sub r13, rax sbb r14, r8 sbb r15, rbx sbb rdi, rdx sbb rsi, r9 adc r10, 0 ; a += (mu << 32) << 64 sub r11, rax adc r12, r8 adc r13, rbx adc r14, rdx adc r15, r9 adc rdi, 0 adc rsi, 0 sbb r10, 0 mov r8, 18446744069414584321 ; mask m and sub from result if overflow ; m[0] = -1 & mask = mask ; m[2] = 0 & mask = 0 mov eax, r10d and r8, r10 sub r14, r10 sbb r15, rax mov QWORD PTR [rcx], r14 sbb rdi, 0 mov QWORD PTR [rcx+8], r15 sbb rsi, r8 mov QWORD PTR [rcx+16], rdi mov QWORD PTR [rcx+24], rsi pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mont_sqr_4 ENDP _text ENDS ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_256_cmp_4 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_256_cmp_4 ENDP _text ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_256_cond_sub_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r14, QWORD PTR [r8] mov r15, QWORD PTR [r8+8] mov rdi, QWORD PTR [r8+16] mov rsi, QWORD PTR [r8+24] and r14, r9 and r15, r9 and rdi, r9 and rsi, r9 mov r10, QWORD PTR [rdx] mov r11, QWORD PTR [rdx+8] mov r12, QWORD PTR [rdx+16] mov r13, QWORD PTR [rdx+24] sub r10, r14 sbb r11, r15 sbb r12, rdi sbb r13, rsi mov QWORD PTR [rcx], r10 mov QWORD PTR [rcx+8], r11 mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 sbb rax, rax pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_cond_sub_4 ENDP _text ENDS ; /* Reduce the number back to 256 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_256_mont_reduce_4 PROC push rbx push rsi push r12 push r13 push r14 push r15 push rdi mov r8, rcx mov r9, QWORD PTR [r8] mov r10, QWORD PTR [r8+8] mov r11, QWORD PTR [r8+16] mov r12, QWORD PTR [r8+24] mov r13, QWORD PTR [r8+32] mov r14, QWORD PTR [r8+40] mov r15, QWORD PTR [r8+48] mov rdi, QWORD PTR [r8+56] ; Start Reduction ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 ; - a[0] << 32 << 192 ; a[0]-a[3] + (a[0] * 2) << 192 mov rax, r9 lea rdx, QWORD PTR [r12+2*r9] mov rbx, r10 mov rcx, r11 mov rsi, r11 ; a[0]-a[2] << 32 shl r9, 32 shld rsi, rbx, 32 shld r10, rax, 32 ; - a[0] << 32 << 192 sub rdx, r9 ; + a[0]-a[2] << 32 << 64 add rbx, r9 adc rcx, r10 adc rdx, rsi ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu xor rsi, rsi ; a += mu << 256 add r13, rax adc r14, rbx adc r15, rcx adc rdi, rdx sbb r9, r9 ; a += mu << 192 add r12, rax adc r13, rbx mov r10, rbx adc r14, rcx adc r15, rdx adc rdi, 0 sbb r9, 0 ; mu <<= 32 shld rsi, rdx, 32 shld rdx, rcx, 32 shld rcx, rbx, 32 shld rbx, rax, 32 shl rax, 32 ; a -= (mu << 32) << 192 sub r12, rax sbb r13, rbx sbb r14, rcx sbb r15, rdx sbb rdi, rsi adc r9, 0 ; a += (mu << 32) << 64 sub r10, rax adc r11, rbx adc r12, rcx adc r13, rdx adc r14, rsi adc r15, 0 adc rdi, 0 sbb r9, 0 mov rbx, 18446744069414584321 ; mask m and sub from result if overflow ; m[0] = -1 & mask = mask ; m[2] = 0 & mask = 0 mov eax, r9d and rbx, r9 sub r13, r9 sbb r14, rax mov QWORD PTR [r8], r13 sbb r15, 0 mov QWORD PTR [r8+8], r14 sbb rdi, rbx mov QWORD PTR [r8+16], r15 mov QWORD PTR [r8+24], rdi pop rdi pop r15 pop r14 pop r13 pop r12 pop rsi pop rbx ret sp_256_mont_reduce_4 ENDP _text ENDS ; /* Reduce the number back to 256 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_256_mont_reduce_order_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx ; i = 0 xor rdi, rdi mov r10, 4 mov r15, rcx L_mont_loop_4: ; mu = a[i] * mp mov r14, QWORD PTR [r15] imul r14, r8 ; a[i+0] += m[0] * mu mov rax, QWORD PTR [r9] mov r12, QWORD PTR [r9+8] mul r14 mov rsi, QWORD PTR [r15] add rsi, rax mov r11, rdx mov QWORD PTR [r15], rsi adc r11, 0 ; a[i+1] += m[1] * mu mov rax, r12 mul r14 mov r12, QWORD PTR [r9+16] mov rsi, QWORD PTR [r15+8] add rax, r11 mov r13, rdx adc r13, 0 add rsi, rax mov QWORD PTR [r15+8], rsi adc r13, 0 ; a[i+2] += m[2] * mu mov rax, r12 mul r14 mov r12, QWORD PTR [r9+24] mov rsi, QWORD PTR [r15+16] add rax, r13 mov r11, rdx adc r11, 0 add rsi, rax mov QWORD PTR [r15+16], rsi adc r11, 0 ; a[i+3] += m[3] * mu mov rax, r12 mul r14 mov rsi, QWORD PTR [r15+24] add rax, r11 adc rdx, rdi mov rdi, 0 adc rdi, 0 add rsi, rax mov QWORD PTR [r15+24], rsi adc QWORD PTR [r15+32], rdx adc rdi, 0 ; i += 1 add r15, 8 dec r10 jnz L_mont_loop_4 xor rax, rax mov rdx, QWORD PTR [rcx+32] mov r10, QWORD PTR [rcx+40] mov rsi, QWORD PTR [rcx+48] mov r11, QWORD PTR [rcx+56] sub rax, rdi mov r12, QWORD PTR [r9] mov r13, QWORD PTR [r9+8] mov r14, QWORD PTR [r9+16] mov r15, QWORD PTR [r9+24] and r12, rax and r13, rax and r14, rax and r15, rax sub rdx, r12 sbb r10, r13 sbb rsi, r14 sbb r11, r15 mov QWORD PTR [rcx], rdx mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], rsi mov QWORD PTR [rcx+24], r11 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mont_reduce_order_4 ENDP _text ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_256_mont_add_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] add rax, QWORD PTR [r8] adc r9, QWORD PTR [r8+8] mov r13, 18446744069414584321 adc r10, QWORD PTR [r8+16] adc r11, QWORD PTR [r8+24] sbb rdx, rdx mov r12d, edx and r13, rdx sub rax, rdx sbb r9, r12 sbb r10, 0 sbb r11, r13 adc rdx, 0 and r12, rdx and r13, rdx sub rax, rdx sbb r9, r12 mov QWORD PTR [rcx], rax sbb r10, 0 mov QWORD PTR [rcx+8], r9 sbb r11, r13 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 pop r13 pop r12 ret sp_256_mont_add_4 ENDP _text ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of doubling. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_256_mont_dbl_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] add rax, rax adc r8, r8 mov r12, 18446744069414584321 adc r9, r9 mov r13, r10 adc r10, r10 sar r13, 63 mov r11d, r13d and r12, r13 sub rax, r13 sbb r8, r11 sbb r9, 0 sbb r10, r12 adc r13, 0 and r11, r13 and r12, r13 sub rax, r13 sbb r8, r11 mov QWORD PTR [rcx], rax sbb r9, 0 mov QWORD PTR [rcx+8], r8 sbb r10, r12 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 pop r13 pop r12 ret sp_256_mont_dbl_4 ENDP _text ENDS ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of Tripling. ; * a Number to triple in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_256_mont_tpl_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] add rax, rax adc r8, r8 mov r12, 18446744069414584321 adc r9, r9 adc r10, r10 sbb r13, r13 mov r11d, r13d and r12, r13 sub rax, r13 sbb r8, r11 sbb r9, 0 sbb r10, r12 adc r13, 0 and r11, r13 and r12, r13 sub rax, r13 sbb r8, r11 sbb r9, 0 sbb r10, r12 add rax, QWORD PTR [rdx] adc r8, QWORD PTR [rdx+8] mov r12, 18446744069414584321 adc r9, QWORD PTR [rdx+16] adc r10, QWORD PTR [rdx+24] sbb r13, 0 mov r11d, r13d and r12, r13 sub rax, r13 sbb r8, r11 sbb r9, 0 sbb r10, r12 adc r13, 0 and r11, r13 and r12, r13 sub rax, r13 sbb r8, r11 mov QWORD PTR [rcx], rax sbb r9, 0 mov QWORD PTR [rcx+8], r8 sbb r10, r12 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 pop r13 pop r12 ret sp_256_mont_tpl_4 ENDP _text ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of subtration. ; * a Number to subtract from in Montgomery form. ; * b Number to subtract with in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_256_mont_sub_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] sub rax, QWORD PTR [r8] sbb r9, QWORD PTR [r8+8] mov r13, 18446744069414584321 sbb r10, QWORD PTR [r8+16] sbb r11, QWORD PTR [r8+24] sbb rdx, rdx mov r12d, edx and r13, rdx add rax, rdx adc r9, r12 adc r10, 0 adc r11, r13 adc rdx, 0 and r12, rdx and r13, rdx add rax, rdx adc r9, r12 mov QWORD PTR [rcx], rax adc r10, 0 mov QWORD PTR [rcx+8], r9 adc r11, r13 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 pop r13 pop r12 ret sp_256_mont_sub_4 ENDP _text ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_256_mont_div2_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r12, 18446744069414584321 mov r13, rax and r13, 1 neg r13 mov r11d, r13d and r12, r13 add rax, r13 adc r8, r11 adc r9, 0 adc r10, r12 mov r13, 0 adc r13, 0 shrd rax, r8, 1 shrd r8, r9, 1 shrd r9, r10, 1 shrd r10, r13, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 pop r13 pop r12 ret sp_256_mont_div2_4 ENDP _text ENDS ; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m). ; * ; * r Result of subtration. ; * a Number to subtract from in Montgomery form. ; * b Number to double and subtract with in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_256_mont_rsb_sub_dbl_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [r8] mov r13, QWORD PTR [r8+8] mov r14, QWORD PTR [r8+16] mov r15, QWORD PTR [r8+24] add r12, r12 adc r13, r13 mov rsi, 18446744069414584321 adc r14, r14 adc r15, r15 sbb rdx, rdx mov edi, edx and rsi, rdx sub r12, rdx sbb r13, rdi sbb r14, 0 sbb r15, rsi adc rdx, 0 and rdi, rdx and rsi, rdx sub r12, rdx sbb r13, rdi sbb r14, 0 sbb r15, rsi sub rax, r12 sbb r9, r13 mov rsi, 18446744069414584321 sbb r10, r14 sbb r11, r15 sbb rdx, 0 mov edi, edx and rsi, rdx add rax, rdx adc r9, rdi adc r10, 0 adc r11, rsi adc rdx, 0 and rdi, rdx and rsi, rdx add rax, rdx adc r9, rdi mov QWORD PTR [rcx], rax adc r10, 0 mov QWORD PTR [rcx+8], r9 adc r11, rsi mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov r12, QWORD PTR [r8] mov r13, QWORD PTR [r8+8] mov r14, QWORD PTR [r8+16] mov r15, QWORD PTR [r8+24] sub r12, rax sbb r13, r9 mov rsi, 18446744069414584321 sbb r14, r10 sbb r15, r11 sbb rdx, rdx mov edi, edx and rsi, rdx add r12, rdx adc r13, rdi adc r14, 0 adc r15, rsi adc rdx, 0 and rdi, rdx and rsi, rdx add r12, rdx adc r13, rdi mov QWORD PTR [r8], r12 adc r14, 0 mov QWORD PTR [r8+8], r13 adc r15, rsi mov QWORD PTR [r8+16], r14 mov QWORD PTR [r8+24], r15 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mont_rsb_sub_dbl_4 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ _text SEGMENT READONLY PARA sp_256_get_point_33_4 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 movdqu OWORD PTR [rsp+96], xmm12 movdqu OWORD PTR [rsp+112], xmm13 movdqu OWORD PTR [rsp+128], xmm14 movdqu OWORD PTR [rsp+144], xmm15 mov rax, 1 movd xmm13, r8d add rdx, 200 movd xmm15, eax mov rax, 32 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm5, xmm5 movdqa xmm14, xmm15 L_256_get_point_33_4_start_1: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 movdqu xmm6, OWORD PTR [rdx] movdqu xmm7, OWORD PTR [rdx+16] movdqu xmm8, OWORD PTR [rdx+64] movdqu xmm9, OWORD PTR [rdx+80] movdqu xmm10, OWORD PTR [rdx+128] movdqu xmm11, OWORD PTR [rdx+144] add rdx, 200 pand xmm6, xmm12 pand xmm7, xmm12 pand xmm8, xmm12 pand xmm9, xmm12 pand xmm10, xmm12 pand xmm11, xmm12 por xmm0, xmm6 por xmm1, xmm7 por xmm2, xmm8 por xmm3, xmm9 por xmm4, xmm10 por xmm5, xmm11 dec rax jnz L_256_get_point_33_4_start_1 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+64], xmm2 movdqu OWORD PTR [rcx+80], xmm3 movdqu OWORD PTR [rcx+128], xmm4 movdqu OWORD PTR [rcx+144], xmm5 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] movdqu xmm12, OWORD PTR [rsp+96] movdqu xmm13, OWORD PTR [rsp+112] movdqu xmm14, OWORD PTR [rsp+128] movdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 ret sp_256_get_point_33_4 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible point that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ _text SEGMENT READONLY PARA sp_256_get_point_33_avx2_4 PROC sub rsp, 64 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 mov rax, 1 movd xmm7, r8d add rdx, 200 movd xmm9, eax mov rax, 32 vpxor ymm8, ymm8, ymm8 vpermd ymm7, ymm8, ymm7 vpermd ymm9, ymm8, ymm9 vpxor ymm0, ymm0, ymm0 vpxor ymm1, ymm1, ymm1 vpxor ymm2, ymm2, ymm2 vmovdqa ymm8, ymm9 L_256_get_point_33_avx2_4_start: vpcmpeqd ymm6, ymm8, ymm7 vpaddd ymm8, ymm8, ymm9 vmovupd ymm3, YMMWORD PTR [rdx] vmovupd ymm4, YMMWORD PTR [rdx+64] vmovupd ymm5, YMMWORD PTR [rdx+128] add rdx, 200 vpand ymm3, ymm3, ymm6 vpand ymm4, ymm4, ymm6 vpand ymm5, ymm5, ymm6 vpor ymm0, ymm0, ymm3 vpor ymm1, ymm1, ymm4 vpor ymm2, ymm2, ymm5 dec rax jnz L_256_get_point_33_avx2_4_start vmovupd YMMWORD PTR [rcx], ymm0 vmovupd YMMWORD PTR [rcx+64], ymm1 vmovupd YMMWORD PTR [rcx+128], ymm2 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] add rsp, 64 ret sp_256_get_point_33_avx2_4 ENDP _text ENDS ENDIF ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Multiply two Montgomery form numbers mod the modulus (prime). ; * (r = a * b mod m) ; * ; * r Result of multiplication. ; * a First number to multiply in Montgomery form. ; * b Second number to multiply in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_256_mont_mul_avx2_4 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rbp, r8 mov rax, rdx mov rdx, QWORD PTR [rax] mov r14, QWORD PTR [rbp+8] ; A[0] * B[0] mulx r9, r8, QWORD PTR [rbp] xor rbx, rbx ; A[0] * B[1] mulx r10, rdi, r14 adcx r9, rdi ; A[0] * B[2] mulx r11, rdi, QWORD PTR [rbp+16] adcx r10, rdi ; A[0] * B[3] mulx r12, rdi, QWORD PTR [rbp+24] adcx r11, rdi mov rdx, QWORD PTR [rax+8] adcx r12, rbx ; A[1] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r9, rdi ; A[1] * B[1] mulx r15, rdi, r14 adox r10, rsi adcx r10, rdi ; A[1] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r11, r15 adcx r11, rdi ; A[1] * B[3] mulx r13, rdi, QWORD PTR [rbp+24] adox r12, rsi adcx r12, rdi adox r13, rbx mov rdx, QWORD PTR [rax+16] adcx r13, rbx ; A[2] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r10, rdi ; A[2] * B[1] mulx r15, rdi, r14 adox r11, rsi adcx r11, rdi ; A[2] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r12, r15 adcx r12, rdi ; A[2] * B[3] mulx r14, rdi, QWORD PTR [rbp+24] adox r13, rsi adcx r13, rdi adox r14, rbx mov rdx, QWORD PTR [rax+24] adcx r14, rbx ; A[3] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r11, rdi ; A[3] * B[1] mulx r15, rdi, QWORD PTR [rbp+8] adox r12, rsi adcx r12, rdi ; A[3] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r13, r15 adcx r13, rdi ; A[3] * B[3] mulx r15, rdi, QWORD PTR [rbp+24] adox r14, rsi adcx r14, rdi adox r15, rbx adcx r15, rbx ; Start Reduction ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 ; - a[0] << 32 << 192 ; a[0]-a[3] + (a[0] * 2) << 192 mov rdi, r8 lea rdx, QWORD PTR [r11+2*r8] mov rax, r9 mov rbp, r10 mov rsi, r10 ; a[0]-a[2] << 32 shl r8, 32 shld rsi, rax, 32 shld r9, rdi, 32 ; - a[0] << 32 << 192 sub rdx, r8 ; + a[0]-a[2] << 32 << 64 add rax, r8 adc rbp, r9 adc rdx, rsi ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu xor rsi, rsi ; a += mu << 256 add r12, rdi adc r13, rax adc r14, rbp adc r15, rdx sbb r8, r8 ; a += mu << 192 add r11, rdi adc r12, rax mov r9, rax adc r13, rbp adc r14, rdx adc r15, 0 sbb r8, 0 ; mu <<= 32 shld rsi, rdx, 32 shld rdx, rbp, 32 shld rbp, rax, 32 shld rax, rdi, 32 shl rdi, 32 ; a -= (mu << 32) << 192 sub r11, rdi sbb r12, rax sbb r13, rbp sbb r14, rdx sbb r15, rsi adc r8, 0 ; a += (mu << 32) << 64 sub r9, rdi adc r10, rax adc r11, rbp adc r12, rdx adc r13, rsi adc r14, 0 adc r15, 0 sbb r8, 0 mov rax, 18446744069414584321 ; mask m and sub from result if overflow ; m[0] = -1 & mask = mask ; m[2] = 0 & mask = 0 mov edi, r8d and rax, r8 sub r12, r8 sbb r13, rdi mov QWORD PTR [rcx], r12 sbb r14, 0 mov QWORD PTR [rcx+8], r13 sbb r15, rax mov QWORD PTR [rcx+16], r14 mov QWORD PTR [rcx+24], r15 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_256_mont_mul_avx2_4 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_256_mont_sqr_avx2_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rax, rdx xor r8, r8 mov rdx, QWORD PTR [rax] mov rsi, QWORD PTR [rax+8] mov rbx, QWORD PTR [rax+16] mov r15, QWORD PTR [rax+24] ; A[0] * A[1] mulx r10, r9, rsi ; A[0] * A[2] mulx r11, r8, rbx adox r10, r8 ; A[0] * A[3] mulx r12, r8, r15 mov rdx, rsi adox r11, r8 ; A[1] * A[2] mulx rdi, r8, rbx mov rdx, r15 adcx r11, r8 ; A[1] * A[3] mulx r13, r8, rsi mov r15, 0 adox r12, rdi adcx r12, r8 ; A[2] * A[3] mulx r14, r8, rbx adox r13, r15 adcx r13, r8 adox r14, r15 adcx r14, r15 ; Double with Carry Flag xor r15, r15 ; A[0] * A[0] mov rdx, QWORD PTR [rax] mulx rdi, r8, rdx adcx r9, r9 adcx r10, r10 adox r9, rdi ; A[1] * A[1] mov rdx, QWORD PTR [rax+8] mulx rbx, rsi, rdx adcx r11, r11 adox r10, rsi ; A[2] * A[2] mov rdx, QWORD PTR [rax+16] mulx rsi, rdi, rdx adcx r12, r12 adox r11, rbx adcx r13, r13 adox r12, rdi adcx r14, r14 ; A[3] * A[3] mov rdx, QWORD PTR [rax+24] mulx rbx, rdi, rdx adox r13, rsi adcx r15, r15 adox r14, rdi adox r15, rbx ; Start Reduction ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192 ; - a[0] << 32 << 192 ; a[0]-a[3] + (a[0] * 2) << 192 mov rdi, r8 lea rdx, QWORD PTR [r11+2*r8] mov rax, r9 mov rsi, r10 mov rbx, r10 ; a[0]-a[2] << 32 shl r8, 32 shld rbx, rax, 32 shld r9, rdi, 32 ; - a[0] << 32 << 192 sub rdx, r8 ; + a[0]-a[2] << 32 << 64 add rax, r8 adc rsi, r9 adc rdx, rbx ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu xor rbx, rbx ; a += mu << 256 add r12, rdi adc r13, rax adc r14, rsi adc r15, rdx sbb r8, r8 ; a += mu << 192 add r11, rdi adc r12, rax mov r9, rax adc r13, rsi adc r14, rdx adc r15, 0 sbb r8, 0 ; mu <<= 32 shld rbx, rdx, 32 shld rdx, rsi, 32 shld rsi, rax, 32 shld rax, rdi, 32 shl rdi, 32 ; a -= (mu << 32) << 192 sub r11, rdi sbb r12, rax sbb r13, rsi sbb r14, rdx sbb r15, rbx adc r8, 0 ; a += (mu << 32) << 64 sub r9, rdi adc r10, rax adc r11, rsi adc r12, rdx adc r13, rbx adc r14, 0 adc r15, 0 sbb r8, 0 mov rax, 18446744069414584321 ; mask m and sub from result if overflow ; m[0] = -1 & mask = mask ; m[2] = 0 & mask = 0 mov edi, r8d and rax, r8 sub r12, r8 sbb r13, rdi mov QWORD PTR [rcx], r12 sbb r14, 0 mov QWORD PTR [rcx+8], r13 sbb r15, rax mov QWORD PTR [rcx+16], r14 mov QWORD PTR [rcx+24], r15 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mont_sqr_avx2_4 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_256_cond_sub_avx2_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r14, QWORD PTR [r8] mov r15, QWORD PTR [r8+8] mov rdi, QWORD PTR [r8+16] mov rsi, QWORD PTR [r8+24] and r14, r9 and r15, r9 and rdi, r9 and rsi, r9 mov r10, QWORD PTR [rdx] mov r11, QWORD PTR [rdx+8] mov r12, QWORD PTR [rdx+16] mov r13, QWORD PTR [rdx+24] sub r10, r14 sbb r11, r15 sbb r12, rdi sbb r13, rsi mov QWORD PTR [rcx], r10 mov QWORD PTR [rcx+8], r11 mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 sbb rax, rax pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_cond_sub_avx2_4 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 256 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_256_mont_reduce_order_avx2_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rax, rcx mov r10, rdx mov r11, r8 mov r14, QWORD PTR [rax] mov r15, QWORD PTR [rax+8] mov rdi, QWORD PTR [rax+16] mov rsi, QWORD PTR [rax+24] xor r13, r13 xor r12, r12 ; a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp) mov rbx, QWORD PTR [rax+32] ; mu = a[0] * mp mov rdx, r14 mulx rcx, rdx, r11 ; a[0] += m[0] * mu mulx r9, r8, QWORD PTR [r10] adcx r14, r8 ; a[1] += m[1] * mu mulx rcx, r8, QWORD PTR [r10+8] adox r15, r9 adcx r15, r8 ; a[2] += m[2] * mu mulx r9, r8, QWORD PTR [r10+16] adox rdi, rcx adcx rdi, r8 ; a[3] += m[3] * mu mulx rcx, r8, QWORD PTR [r10+24] adox rsi, r9 adcx rsi, r8 ; a[4] += carry adox rbx, rcx adcx rbx, r12 ; carry adox r13, r12 adcx r13, r12 ; a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp) mov r14, QWORD PTR [rax+40] ; mu = a[1] * mp mov rdx, r15 mulx rcx, rdx, r11 ; a[1] += m[0] * mu mulx r9, r8, QWORD PTR [r10] adcx r15, r8 ; a[2] += m[1] * mu mulx rcx, r8, QWORD PTR [r10+8] adox rdi, r9 adcx rdi, r8 ; a[3] += m[2] * mu mulx r9, r8, QWORD PTR [r10+16] adox rsi, rcx adcx rsi, r8 ; a[4] += m[3] * mu mulx rcx, r8, QWORD PTR [r10+24] adox rbx, r9 adcx rbx, r8 ; a[5] += carry adox r14, rcx adcx r14, r13 mov r13, r12 ; carry adox r13, r12 adcx r13, r12 ; a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp) mov r15, QWORD PTR [rax+48] ; mu = a[2] * mp mov rdx, rdi mulx rcx, rdx, r11 ; a[2] += m[0] * mu mulx r9, r8, QWORD PTR [r10] adcx rdi, r8 ; a[3] += m[1] * mu mulx rcx, r8, QWORD PTR [r10+8] adox rsi, r9 adcx rsi, r8 ; a[4] += m[2] * mu mulx r9, r8, QWORD PTR [r10+16] adox rbx, rcx adcx rbx, r8 ; a[5] += m[3] * mu mulx rcx, r8, QWORD PTR [r10+24] adox r14, r9 adcx r14, r8 ; a[6] += carry adox r15, rcx adcx r15, r13 mov r13, r12 ; carry adox r13, r12 adcx r13, r12 ; a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp) mov rdi, QWORD PTR [rax+56] ; mu = a[3] * mp mov rdx, rsi mulx rcx, rdx, r11 ; a[3] += m[0] * mu mulx r9, r8, QWORD PTR [r10] adcx rsi, r8 ; a[4] += m[1] * mu mulx rcx, r8, QWORD PTR [r10+8] adox rbx, r9 adcx rbx, r8 ; a[5] += m[2] * mu mulx r9, r8, QWORD PTR [r10+16] adox r14, rcx adcx r14, r8 ; a[6] += m[3] * mu mulx rcx, r8, QWORD PTR [r10+24] adox r15, r9 adcx r15, r8 ; a[7] += carry adox rdi, rcx adcx rdi, r13 mov r13, r12 ; carry adox r13, r12 adcx r13, r12 ; Subtract mod if carry neg r13 mov r8, 17562291160714782033 mov r9, 13611842547513532036 mov rdx, 18446744069414584320 and r8, r13 and r9, r13 and rdx, r13 sub rbx, r8 sbb r14, r9 sbb r15, r13 sbb rdi, rdx mov QWORD PTR [rax], rbx mov QWORD PTR [rax+8], r14 mov QWORD PTR [rax+16], r15 mov QWORD PTR [rax+24], rdi pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mont_reduce_order_avx2_4 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_256_mont_div2_avx2_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r12, 18446744069414584321 mov r13, rax and r13, 1 neg r13 mov r11d, r13d and r12, r13 add rax, r13 adc r8, r11 adc r9, 0 adc r10, r12 mov r13, 0 adc r13, 0 shrd rax, r8, 1 shrd r8, r9, 1 shrd r9, r10, 1 shrd r10, r13, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 pop r13 pop r12 ret sp_256_mont_div2_avx2_4 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_256_get_entry_64_4 PROC sub rsp, 96 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 ; From entry 1 mov rax, 1 movd xmm9, r8d add rdx, 64 movd xmm11, eax mov rax, 63 pshufd xmm11, xmm11, 0 pshufd xmm9, xmm9, 0 pxor xmm10, xmm10 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 movdqa xmm10, xmm11 L_256_get_entry_64_4_start_0: movdqa xmm8, xmm10 paddd xmm10, xmm11 pcmpeqd xmm8, xmm9 movdqu xmm4, OWORD PTR [rdx] movdqu xmm5, OWORD PTR [rdx+16] movdqu xmm6, OWORD PTR [rdx+32] movdqu xmm7, OWORD PTR [rdx+48] add rdx, 64 pand xmm4, xmm8 pand xmm5, xmm8 pand xmm6, xmm8 pand xmm7, xmm8 por xmm0, xmm4 por xmm1, xmm5 por xmm2, xmm6 por xmm3, xmm7 dec rax jnz L_256_get_entry_64_4_start_0 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+64], xmm2 movdqu OWORD PTR [rcx+80], xmm3 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] add rsp, 96 ret sp_256_get_entry_64_4 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_256_get_entry_64_avx2_4 PROC sub rsp, 32 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 mov rax, 1 movd xmm5, r8d add rdx, 64 movd xmm7, eax mov rax, 64 vpxor ymm6, ymm6, ymm6 vpermd ymm5, ymm6, ymm5 vpermd ymm7, ymm6, ymm7 vpxor ymm0, ymm0, ymm0 vpxor ymm1, ymm1, ymm1 vmovdqa ymm6, ymm7 L_256_get_entry_64_avx2_4_start: vpcmpeqd ymm4, ymm6, ymm5 vpaddd ymm6, ymm6, ymm7 vmovupd ymm2, YMMWORD PTR [rdx] vmovupd ymm3, YMMWORD PTR [rdx+32] add rdx, 64 vpand ymm2, ymm2, ymm4 vpand ymm3, ymm3, ymm4 vpor ymm0, ymm0, ymm2 vpor ymm1, ymm1, ymm3 dec rax jnz L_256_get_entry_64_avx2_4_start vmovupd YMMWORD PTR [rcx], ymm0 vmovupd YMMWORD PTR [rcx+64], ymm1 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] add rsp, 32 ret sp_256_get_entry_64_avx2_4 ENDP _text ENDS ENDIF ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_256_get_entry_65_4 PROC sub rsp, 96 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 ; From entry 1 mov rax, 1 movd xmm9, r8d add rdx, 64 movd xmm11, eax mov rax, 64 pshufd xmm11, xmm11, 0 pshufd xmm9, xmm9, 0 pxor xmm10, xmm10 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 movdqa xmm10, xmm11 L_256_get_entry_65_4_start_0: movdqa xmm8, xmm10 paddd xmm10, xmm11 pcmpeqd xmm8, xmm9 movdqu xmm4, OWORD PTR [rdx] movdqu xmm5, OWORD PTR [rdx+16] movdqu xmm6, OWORD PTR [rdx+32] movdqu xmm7, OWORD PTR [rdx+48] add rdx, 64 pand xmm4, xmm8 pand xmm5, xmm8 pand xmm6, xmm8 pand xmm7, xmm8 por xmm0, xmm4 por xmm1, xmm5 por xmm2, xmm6 por xmm3, xmm7 dec rax jnz L_256_get_entry_65_4_start_0 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+64], xmm2 movdqu OWORD PTR [rcx+80], xmm3 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] add rsp, 96 ret sp_256_get_entry_65_4 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_256_get_entry_65_avx2_4 PROC sub rsp, 32 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 mov rax, 1 movd xmm5, r8d add rdx, 64 movd xmm7, eax mov rax, 65 vpxor ymm6, ymm6, ymm6 vpermd ymm5, ymm6, ymm5 vpermd ymm7, ymm6, ymm7 vpxor ymm0, ymm0, ymm0 vpxor ymm1, ymm1, ymm1 vmovdqa ymm6, ymm7 L_256_get_entry_65_avx2_4_start: vpcmpeqd ymm4, ymm6, ymm5 vpaddd ymm6, ymm6, ymm7 vmovupd ymm2, YMMWORD PTR [rdx] vmovupd ymm3, YMMWORD PTR [rdx+32] add rdx, 64 vpand ymm2, ymm2, ymm4 vpand ymm3, ymm3, ymm4 vpor ymm0, ymm0, ymm2 vpor ymm1, ymm1, ymm3 dec rax jnz L_256_get_entry_65_avx2_4_start vmovupd YMMWORD PTR [rcx], ymm0 vmovupd YMMWORD PTR [rcx+64], ymm1 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] add rsp, 32 ret sp_256_get_entry_65_avx2_4 ENDP _text ENDS ENDIF ENDIF ; /* Add 1 to a. (a = a + 1) ; * ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_256_add_one_4 PROC add QWORD PTR [rcx], 1 adc QWORD PTR [rcx+8], 0 adc QWORD PTR [rcx+16], 0 adc QWORD PTR [rcx+24], 0 ret sp_256_add_one_4 ENDP _text ENDS ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_256_from_bin_bswap PROC push r12 push r13 mov r11, r8 mov r12, rcx add r11, r9 add r12, 32 xor r13, r13 jmp L_256_from_bin_bswap_64_end L_256_from_bin_bswap_64_start: sub r11, 64 mov rax, QWORD PTR [r11+56] mov r10, QWORD PTR [r11+48] bswap rax bswap r10 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [r11+40] mov r10, QWORD PTR [r11+32] bswap rax bswap r10 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [r11+24] mov r10, QWORD PTR [r11+16] bswap rax bswap r10 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [r11+8] mov r10, QWORD PTR [r11] bswap rax bswap r10 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_256_from_bin_bswap_64_end: cmp r9, 63 jg L_256_from_bin_bswap_64_start jmp L_256_from_bin_bswap_8_end L_256_from_bin_bswap_8_start: sub r11, 8 mov rax, QWORD PTR [r11] bswap rax mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_256_from_bin_bswap_8_end: cmp r9, 7 jg L_256_from_bin_bswap_8_start cmp r9, r13 je L_256_from_bin_bswap_hi_end mov r10, r13 mov rax, r13 L_256_from_bin_bswap_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_256_from_bin_bswap_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_256_from_bin_bswap_hi_end: cmp rcx, r12 jge L_256_from_bin_bswap_zero_end L_256_from_bin_bswap_zero_start: mov QWORD PTR [rcx], r13 add rcx, 8 cmp rcx, r12 jl L_256_from_bin_bswap_zero_start L_256_from_bin_bswap_zero_end: pop r13 pop r12 ret sp_256_from_bin_bswap ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_256_from_bin_movbe PROC push r12 mov r11, r8 mov r12, rcx add r11, r9 add r12, 32 jmp L_256_from_bin_movbe_64_end L_256_from_bin_movbe_64_start: sub r11, 64 movbe rax, QWORD PTR [r11+56] movbe r10, QWORD PTR [r11+48] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 movbe rax, QWORD PTR [r11+40] movbe r10, QWORD PTR [r11+32] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 movbe rax, QWORD PTR [r11+24] movbe r10, QWORD PTR [r11+16] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 movbe rax, QWORD PTR [r11+8] movbe r10, QWORD PTR [r11] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_256_from_bin_movbe_64_end: cmp r9, 63 jg L_256_from_bin_movbe_64_start jmp L_256_from_bin_movbe_8_end L_256_from_bin_movbe_8_start: sub r11, 8 movbe rax, QWORD PTR [r11] mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_256_from_bin_movbe_8_end: cmp r9, 7 jg L_256_from_bin_movbe_8_start cmp r9, 0 je L_256_from_bin_movbe_hi_end mov r10, 0 mov rax, 0 L_256_from_bin_movbe_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_256_from_bin_movbe_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_256_from_bin_movbe_hi_end: cmp rcx, r12 jge L_256_from_bin_movbe_zero_end L_256_from_bin_movbe_zero_start: mov QWORD PTR [rcx], 0 add rcx, 8 cmp rcx, r12 jl L_256_from_bin_movbe_zero_start L_256_from_bin_movbe_zero_end: pop r12 ret sp_256_from_bin_movbe ENDP _text ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 32 ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_256_to_bin_bswap_4 PROC mov rax, QWORD PTR [rcx+24] mov r8, QWORD PTR [rcx+16] bswap rax bswap r8 mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 mov rax, QWORD PTR [rcx+8] mov r8, QWORD PTR [rcx] bswap rax bswap r8 mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 ret sp_256_to_bin_bswap_4 ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 32 ; * Uses the movbe instruction which is optional. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_256_to_bin_movbe_4 PROC movbe rax, QWORD PTR [rcx+24] movbe r8, QWORD PTR [rcx+16] mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 movbe rax, QWORD PTR [rcx+8] movbe r8, QWORD PTR [rcx] mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 ret sp_256_to_bin_movbe_4 ENDP _text ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_256_sub_in_place_4 PROC mov r8, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] sub QWORD PTR [rcx], r8 sbb QWORD PTR [rcx+8], r9 sbb QWORD PTR [rcx+16], r10 sbb QWORD PTR [rcx+24], r11 sbb rax, rax ret sp_256_sub_in_place_4 ENDP _text ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_256_mul_d_4 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 mul QWORD PTR [r9+24] add r10, rax adc r11, rdx mov QWORD PTR [rcx+24], r10 mov QWORD PTR [rcx+32], r11 pop r12 ret sp_256_mul_d_4 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_256_mul_d_avx2_4 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+24], r12 mov QWORD PTR [rcx+32], r11 pop r13 pop r12 ret sp_256_mul_d_avx2_4 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_256_word_asm_4 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_256_word_asm_4 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Multiply two Montgomery form numbers mod the modulus (prime). ; * (r = a * b mod m) ; * ; * r Result of multiplication. ; * a First number to multiply in Montgomery form. ; * b Second number to multiply in Montgomery form. ; */ _text SEGMENT READONLY PARA sp_256_mont_mul_order_avx2_4 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rbp, r8 mov rax, rdx mov rdx, QWORD PTR [rax] mov r14, QWORD PTR [rbp+8] ; A[0] * B[0] mulx r9, r8, QWORD PTR [rbp] xor rbx, rbx ; A[0] * B[1] mulx r10, rdi, r14 adcx r9, rdi ; A[0] * B[2] mulx r11, rdi, QWORD PTR [rbp+16] adcx r10, rdi ; A[0] * B[3] mulx r12, rdi, QWORD PTR [rbp+24] adcx r11, rdi mov rdx, QWORD PTR [rax+8] adcx r12, rbx ; A[1] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r9, rdi ; A[1] * B[1] mulx r15, rdi, r14 adox r10, rsi adcx r10, rdi ; A[1] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r11, r15 adcx r11, rdi ; A[1] * B[3] mulx r13, rdi, QWORD PTR [rbp+24] adox r12, rsi adcx r12, rdi adox r13, rbx mov rdx, QWORD PTR [rax+16] adcx r13, rbx ; A[2] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r10, rdi ; A[2] * B[1] mulx r15, rdi, r14 adox r11, rsi adcx r11, rdi ; A[2] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r12, r15 adcx r12, rdi ; A[2] * B[3] mulx r14, rdi, QWORD PTR [rbp+24] adox r13, rsi adcx r13, rdi adox r14, rbx mov rdx, QWORD PTR [rax+24] adcx r14, rbx ; A[3] * B[0] mulx rsi, rdi, QWORD PTR [rbp] xor rbx, rbx adcx r11, rdi ; A[3] * B[1] mulx r15, rdi, QWORD PTR [rbp+8] adox r12, rsi adcx r12, rdi ; A[3] * B[2] mulx rsi, rdi, QWORD PTR [rbp+16] adox r13, r15 adcx r13, rdi ; A[3] * B[3] mulx r15, rdi, QWORD PTR [rbp+24] adox r14, rsi adcx r14, rdi adox r15, rbx adcx r15, rbx ; Start Reduction mov rbx, 14758798090332847183 ; A[0] mov rdx, rbx imul rdx, r8 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r8, rsi adox r9, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r9, rsi adox r10, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r10, rsi adox r11, rax mulx rax, rsi, rdi adcx r11, rsi adox r12, rax adcx r12, rbp mov r8, rbp ; carry adox r8, rbp adcx r8, rbp ; A[1] mov rdx, rbx imul rdx, r9 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r9, rsi adox r10, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r10, rsi adox r11, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r11, rsi adox r12, rax mulx rax, rsi, rdi adcx r12, rsi adox r13, rax adcx r13, r8 mov r8, rbp ; carry adox r8, rbp adcx r8, rbp ; A[2] mov rdx, rbx imul rdx, r10 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r10, rsi adox r11, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r11, rsi adox r12, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r12, rsi adox r13, rax mulx rax, rsi, rdi adcx r13, rsi adox r14, rax adcx r14, r8 mov r8, rbp ; carry adox r8, rbp adcx r8, rbp ; A[3] mov rdx, rbx imul rdx, r11 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r11, rsi adox r12, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r12, rsi adox r13, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r13, rsi adox r14, rax mulx rax, rsi, rdi adcx r14, rsi adox r15, rax adcx r15, r8 mov r8, rbp ; carry adox r8, rbp adcx r8, rbp neg r8 mov rdi, 17562291160714782033 mov rbx, 13611842547513532036 and rdi, r8 mov rbp, 18446744069414584320 and rbx, r8 and rbp, r8 sub r12, rdi sbb r13, rbx mov QWORD PTR [rcx], r12 sbb r14, r8 mov QWORD PTR [rcx+8], r13 sbb r15, rbp mov QWORD PTR [rcx+16], r14 mov QWORD PTR [rcx+24], r15 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_256_mont_mul_order_avx2_4 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; */ _text SEGMENT READONLY PARA sp_256_mont_sqr_order_avx2_4 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rax, rdx xor r8, r8 mov rdx, QWORD PTR [rax] mov rsi, QWORD PTR [rax+8] mov rbx, QWORD PTR [rax+16] mov r15, QWORD PTR [rax+24] ; A[0] * A[1] mulx r10, r9, rsi ; A[0] * A[2] mulx r11, r8, rbx adox r10, r8 ; A[0] * A[3] mulx r12, r8, r15 mov rdx, rsi adox r11, r8 ; A[1] * A[2] mulx rdi, r8, rbx mov rdx, r15 adcx r11, r8 ; A[1] * A[3] mulx r13, r8, rsi mov r15, 0 adox r12, rdi adcx r12, r8 ; A[2] * A[3] mulx r14, r8, rbx adox r13, r15 adcx r13, r8 adox r14, r15 adcx r14, r15 ; Double with Carry Flag xor r15, r15 ; A[0] * A[0] mov rdx, QWORD PTR [rax] mulx rdi, r8, rdx adcx r9, r9 adcx r10, r10 adox r9, rdi ; A[1] * A[1] mov rdx, QWORD PTR [rax+8] mulx rbx, rsi, rdx adcx r11, r11 adox r10, rsi ; A[2] * A[2] mov rdx, QWORD PTR [rax+16] mulx rsi, rdi, rdx adcx r12, r12 adox r11, rbx adcx r13, r13 adox r12, rdi adcx r14, r14 ; A[3] * A[3] mov rdx, QWORD PTR [rax+24] mulx rbx, rdi, rdx adox r13, rsi adcx r15, r15 adox r14, rdi adox r15, rbx ; Start Reduction mov rbx, 14758798090332847183 ; A[0] mov rdx, rbx imul rdx, r8 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r8, rsi adox r9, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r9, rsi adox r10, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r10, rsi adox r11, rax mulx rax, rsi, rdi adcx r11, rsi adox r12, rax adcx r12, rbp mov r8, rbp ; carry adox r8, rbp adcx r8, rbp ; A[1] mov rdx, rbx imul rdx, r9 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r9, rsi adox r10, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r10, rsi adox r11, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r11, rsi adox r12, rax mulx rax, rsi, rdi adcx r12, rsi adox r13, rax adcx r13, r8 mov r8, rbp ; carry adox r8, rbp adcx r8, rbp ; A[2] mov rdx, rbx imul rdx, r10 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r10, rsi adox r11, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r11, rsi adox r12, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r12, rsi adox r13, rax mulx rax, rsi, rdi adcx r13, rsi adox r14, rax adcx r14, r8 mov r8, rbp ; carry adox r8, rbp adcx r8, rbp ; A[3] mov rdx, rbx imul rdx, r11 mov rdi, 17562291160714782033 xor rbp, rbp mulx rax, rsi, rdi mov rdi, 13611842547513532036 adcx r11, rsi adox r12, rax mulx rax, rsi, rdi mov rdi, 18446744073709551615 adcx r12, rsi adox r13, rax mulx rax, rsi, rdi mov rdi, 18446744069414584320 adcx r13, rsi adox r14, rax mulx rax, rsi, rdi adcx r14, rsi adox r15, rax adcx r15, r8 mov r8, rbp ; carry adox r8, rbp adcx r8, rbp neg r8 mov rdi, 17562291160714782033 mov rbx, 13611842547513532036 and rdi, r8 mov rbp, 18446744069414584320 and rbx, r8 and rbp, r8 sub r12, rdi sbb r13, rbx mov QWORD PTR [rcx], r12 sbb r14, r8 mov QWORD PTR [rcx+8], r13 sbb r15, rbp mov QWORD PTR [rcx+16], r14 mov QWORD PTR [rcx+24], r15 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_256_mont_sqr_order_avx2_4 ENDP _text ENDS ENDIF ; /* Non-constant time modular inversion. ; * ; * @param [out] r Resulting number. ; * @param [in] a Number to invert. ; * @param [in] m Modulus. ; * @return MP_OKAY on success. ; */ _text SEGMENT READONLY PARA sp_256_mod_inv_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi sub rsp, 513 mov r9, QWORD PTR [r8] mov r10, QWORD PTR [r8+8] mov r11, QWORD PTR [r8+16] mov r12, QWORD PTR [r8+24] mov r13, QWORD PTR [rdx] mov r14, QWORD PTR [rdx+8] mov r15, QWORD PTR [rdx+16] mov rdi, QWORD PTR [rdx+24] mov rsi, 0 test r13b, 1 jnz L_256_mod_inv_4_v_even_end L_256_mod_inv_4_v_even_start: shrd r13, r14, 1 shrd r14, r15, 1 shrd r15, rdi, 1 shr rdi, 1 mov BYTE PTR [rsp+rsi], 1 inc rsi test r13b, 1 jz L_256_mod_inv_4_v_even_start L_256_mod_inv_4_v_even_end: L_256_mod_inv_4_uv_start: cmp r12, rdi jb L_256_mod_inv_4_uv_v ja L_256_mod_inv_4_uv_u cmp r11, r15 jb L_256_mod_inv_4_uv_v ja L_256_mod_inv_4_uv_u cmp r10, r14 jb L_256_mod_inv_4_uv_v ja L_256_mod_inv_4_uv_u cmp r9, r13 jb L_256_mod_inv_4_uv_v L_256_mod_inv_4_uv_u: mov BYTE PTR [rsp+rsi], 2 inc rsi sub r9, r13 sbb r10, r14 sbb r11, r15 sbb r12, rdi shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 shr r12, 1 test r9b, 1 jnz L_256_mod_inv_4_usubv_even_end L_256_mod_inv_4_usubv_even_start: shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 shr r12, 1 mov BYTE PTR [rsp+rsi], 0 inc rsi test r9b, 1 jz L_256_mod_inv_4_usubv_even_start L_256_mod_inv_4_usubv_even_end: cmp r9, 1 jne L_256_mod_inv_4_uv_start mov rdx, r10 or rdx, r11 jne L_256_mod_inv_4_uv_start or rdx, r12 jne L_256_mod_inv_4_uv_start mov al, 1 jmp L_256_mod_inv_4_uv_end L_256_mod_inv_4_uv_v: mov BYTE PTR [rsp+rsi], 3 inc rsi sub r13, r9 sbb r14, r10 sbb r15, r11 sbb rdi, r12 shrd r13, r14, 1 shrd r14, r15, 1 shrd r15, rdi, 1 shr rdi, 1 test r13b, 1 jnz L_256_mod_inv_4_vsubu_even_end L_256_mod_inv_4_vsubu_even_start: shrd r13, r14, 1 shrd r14, r15, 1 shrd r15, rdi, 1 shr rdi, 1 mov BYTE PTR [rsp+rsi], 1 inc rsi test r13b, 1 jz L_256_mod_inv_4_vsubu_even_start L_256_mod_inv_4_vsubu_even_end: cmp r13, 1 jne L_256_mod_inv_4_uv_start mov rdx, r14 or rdx, r15 jne L_256_mod_inv_4_uv_start or rdx, rdi jne L_256_mod_inv_4_uv_start mov al, 0 L_256_mod_inv_4_uv_end: mov r9, QWORD PTR [r8] mov r10, QWORD PTR [r8+8] mov r11, QWORD PTR [r8+16] mov r12, QWORD PTR [r8+24] mov r13, 1 xor r14, r14 xor r15, r15 xor rdi, rdi mov BYTE PTR [rsp+rsi], 7 mov dl, BYTE PTR [rsp] mov rsi, 1 cmp dl, 1 je L_256_mod_inv_4_op_div2_d jl L_256_mod_inv_4_op_div2_b cmp dl, 3 je L_256_mod_inv_4_op_d_sub_b jl L_256_mod_inv_4_op_b_sub_d jmp L_256_mod_inv_4_op_end L_256_mod_inv_4_op_b_sub_d: sub r9, r13 sbb r10, r14 sbb r11, r15 sbb r12, rdi jnc L_256_mod_inv_4_op_div2_b add r9, QWORD PTR [r8] adc r10, QWORD PTR [r8+8] adc r11, QWORD PTR [r8+16] adc r12, QWORD PTR [r8+24] L_256_mod_inv_4_op_div2_b: test r9b, 1 mov rdx, 0 jz L_256_mod_inv_4_op_div2_b_mod add r9, QWORD PTR [r8] adc r10, QWORD PTR [r8+8] adc r11, QWORD PTR [r8+16] adc r12, QWORD PTR [r8+24] adc rdx, 0 L_256_mod_inv_4_op_div2_b_mod: shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 shrd r12, rdx, 1 mov dl, BYTE PTR [rsp+rsi] inc rsi cmp dl, 1 je L_256_mod_inv_4_op_div2_d jl L_256_mod_inv_4_op_div2_b cmp dl, 3 je L_256_mod_inv_4_op_d_sub_b jl L_256_mod_inv_4_op_b_sub_d jmp L_256_mod_inv_4_op_end L_256_mod_inv_4_op_d_sub_b: sub r13, r9 sbb r14, r10 sbb r15, r11 sbb rdi, r12 jnc L_256_mod_inv_4_op_div2_d add r13, QWORD PTR [r8] adc r14, QWORD PTR [r8+8] adc r15, QWORD PTR [r8+16] adc rdi, QWORD PTR [r8+24] L_256_mod_inv_4_op_div2_d: test r13b, 1 mov rdx, 0 jz L_256_mod_inv_4_op_div2_d_mod add r13, QWORD PTR [r8] adc r14, QWORD PTR [r8+8] adc r15, QWORD PTR [r8+16] adc rdi, QWORD PTR [r8+24] adc rdx, 0 L_256_mod_inv_4_op_div2_d_mod: shrd r13, r14, 1 shrd r14, r15, 1 shrd r15, rdi, 1 shrd rdi, rdx, 1 mov dl, BYTE PTR [rsp+rsi] inc rsi cmp dl, 1 je L_256_mod_inv_4_op_div2_d jl L_256_mod_inv_4_op_div2_b cmp dl, 3 je L_256_mod_inv_4_op_d_sub_b jl L_256_mod_inv_4_op_b_sub_d L_256_mod_inv_4_op_end: cmp al, 1 jne L_256_mod_inv_4_store_d mov QWORD PTR [rcx], r9 mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 jmp L_256_mod_inv_4_store_end L_256_mod_inv_4_store_d: mov QWORD PTR [rcx], r13 mov QWORD PTR [rcx+8], r14 mov QWORD PTR [rcx+16], r15 mov QWORD PTR [rcx+24], rdi L_256_mod_inv_4_store_end: add rsp, 513 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mod_inv_4 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_order DWORD 6497617,32001851,62711546,67108863,67043328,0,0,0,41070783,45522014,67108863,1023,4194303,0,0,0 ptr_L_sp256_mod_inv_avx2_4_order QWORD L_sp256_mod_inv_avx2_4_order _DATA ENDS _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_one QWORD 1, 0, 0, 0 ptr_L_sp256_mod_inv_avx2_4_one QWORD L_sp256_mod_inv_avx2_4_one _DATA ENDS _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_all_one DWORD 1,1,1,1,1,1,1,1 ptr_L_sp256_mod_inv_avx2_4_all_one QWORD L_sp256_mod_inv_avx2_4_all_one _DATA ENDS _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_mask01111 DWORD 0,1,1,1,1,0,0,0 ptr_L_sp256_mod_inv_avx2_4_mask01111 QWORD L_sp256_mod_inv_avx2_4_mask01111 _DATA ENDS _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_down_one_dword DWORD 1,2,3,4,5,6,7,7 ptr_L_sp256_mod_inv_avx2_4_down_one_dword QWORD L_sp256_mod_inv_avx2_4_down_one_dword _DATA ENDS _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_neg DWORD 0,0,0,0,2147483648,0,0,0 ptr_L_sp256_mod_inv_avx2_4_neg QWORD L_sp256_mod_inv_avx2_4_neg _DATA ENDS _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_up_one_dword DWORD 7,0,1,2,3,7,7,7 ptr_L_sp256_mod_inv_avx2_4_up_one_dword QWORD L_sp256_mod_inv_avx2_4_up_one_dword _DATA ENDS _DATA SEGMENT ALIGN 16 L_sp256_mod_inv_avx2_4_mask26 DWORD 67108863,67108863,67108863,67108863,67108863,0,0,0 ptr_L_sp256_mod_inv_avx2_4_mask26 QWORD L_sp256_mod_inv_avx2_4_mask26 _DATA ENDS ; /* Non-constant time modular inversion. ; * ; * @param [out] r Resulting number. ; * @param [in] a Number to invert. ; * @param [in] m Modulus. ; * @return MP_OKAY on success. ; */ _text SEGMENT READONLY PARA sp_256_mod_inv_avx2_4 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx sub rsp, 144 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 vmovdqu OWORD PTR [rsp+128], xmm14 mov rax, QWORD PTR [r8] mov r9, QWORD PTR [r8+8] mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] mov r12, QWORD PTR [rdx] mov r13, QWORD PTR [rdx+8] mov r14, QWORD PTR [rdx+16] mov r15, QWORD PTR [rdx+24] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_order] vmovupd ymm6, YMMWORD PTR [rbx] vmovupd ymm7, YMMWORD PTR [rbx+32] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_one] vmovupd ymm8, YMMWORD PTR [rbx] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_mask01111] vmovupd ymm9, YMMWORD PTR [rbx] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_all_one] vmovupd ymm10, YMMWORD PTR [rbx] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_down_one_dword] vmovupd ymm11, YMMWORD PTR [rbx] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_neg] vmovupd ymm12, YMMWORD PTR [rbx] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_up_one_dword] vmovupd ymm13, YMMWORD PTR [rbx] mov rbx, QWORD PTR [ptr_L_sp256_mod_inv_avx2_4_mask26] vmovupd ymm14, YMMWORD PTR [rbx] vpxor xmm0, xmm0, xmm0 vpxor xmm1, xmm1, xmm1 vmovdqu ymm2, ymm8 vpxor xmm3, xmm3, xmm3 test r12b, 1 jnz L_256_mod_inv_avx2_4_v_even_end L_256_mod_inv_avx2_4_v_even_start: shrd r12, r13, 1 shrd r13, r14, 1 shrd r14, r15, 1 shr r15, 1 vptest ymm2, ymm8 jz L_256_mod_inv_avx2_4_v_even_shr1 vpaddd ymm2, ymm2, ymm6 vpaddd ymm3, ymm3, ymm7 L_256_mod_inv_avx2_4_v_even_shr1: vpand ymm4, ymm2, ymm9 vpand ymm5, ymm3, ymm10 vpermd ymm4, ymm11, ymm4 vpsrad ymm2, ymm2, 1 vpsrad ymm3, ymm3, 1 vpslld ymm5, ymm5, 25 vpslld xmm4, xmm4, 25 vpaddd ymm2, ymm2, ymm5 vpaddd ymm3, ymm3, ymm4 test r12b, 1 jz L_256_mod_inv_avx2_4_v_even_start L_256_mod_inv_avx2_4_v_even_end: L_256_mod_inv_avx2_4_uv_start: cmp r11, r15 jb L_256_mod_inv_avx2_4_uv_v ja L_256_mod_inv_avx2_4_uv_u cmp r10, r14 jb L_256_mod_inv_avx2_4_uv_v ja L_256_mod_inv_avx2_4_uv_u cmp r9, r13 jb L_256_mod_inv_avx2_4_uv_v ja L_256_mod_inv_avx2_4_uv_u cmp rax, r12 jb L_256_mod_inv_avx2_4_uv_v L_256_mod_inv_avx2_4_uv_u: sub rax, r12 sbb r9, r13 vpsubd ymm0, ymm0, ymm2 sbb r10, r14 vpsubd ymm1, ymm1, ymm3 sbb r11, r15 vptest ymm1, ymm12 jz L_256_mod_inv_avx2_4_usubv_done_neg vpaddd ymm0, ymm0, ymm6 vpaddd ymm1, ymm1, ymm7 L_256_mod_inv_avx2_4_usubv_done_neg: L_256_mod_inv_avx2_4_usubv_shr1: shrd rax, r9, 1 shrd r9, r10, 1 shrd r10, r11, 1 shr r11, 1 vptest ymm0, ymm8 jz L_256_mod_inv_avx2_4_usubv_sub_shr1 vpaddd ymm0, ymm0, ymm6 vpaddd ymm1, ymm1, ymm7 L_256_mod_inv_avx2_4_usubv_sub_shr1: vpand ymm4, ymm0, ymm9 vpand ymm5, ymm1, ymm10 vpermd ymm4, ymm11, ymm4 vpsrad ymm0, ymm0, 1 vpsrad ymm1, ymm1, 1 vpslld ymm5, ymm5, 25 vpslld xmm4, xmm4, 25 vpaddd ymm0, ymm0, ymm5 vpaddd ymm1, ymm1, ymm4 test al, 1 jz L_256_mod_inv_avx2_4_usubv_shr1 cmp rax, 1 jne L_256_mod_inv_avx2_4_uv_start mov rdx, r9 or rdx, r10 jne L_256_mod_inv_avx2_4_uv_start or rdx, r11 jne L_256_mod_inv_avx2_4_uv_start vpextrd eax, xmm0, 0 vpextrd r10d, xmm0, 1 vpextrd r12d, xmm0, 2 vpextrd r14d, xmm0, 3 vpextrd r9d, xmm1, 0 vpextrd r11d, xmm1, 1 vpextrd r13d, xmm1, 2 vpextrd r15d, xmm1, 3 vextracti128 xmm0, ymm0, 1 vextracti128 xmm1, ymm1, 1 vpextrd edi, xmm0, 0 vpextrd esi, xmm1, 0 jmp L_256_mod_inv_avx2_4_store_done L_256_mod_inv_avx2_4_uv_v: sub r12, rax sbb r13, r9 vpsubd ymm2, ymm2, ymm0 sbb r14, r10 vpsubd ymm3, ymm3, ymm1 sbb r15, r11 vptest ymm3, ymm12 jz L_256_mod_inv_avx2_4_vsubu_done_neg vpaddd ymm2, ymm2, ymm6 vpaddd ymm3, ymm3, ymm7 L_256_mod_inv_avx2_4_vsubu_done_neg: L_256_mod_inv_avx2_4_vsubu_shr1: shrd r12, r13, 1 shrd r13, r14, 1 shrd r14, r15, 1 shr r15, 1 vptest ymm2, ymm8 jz L_256_mod_inv_avx2_4_vsubu_sub_shr1 vpaddd ymm2, ymm2, ymm6 vpaddd ymm3, ymm3, ymm7 L_256_mod_inv_avx2_4_vsubu_sub_shr1: vpand ymm4, ymm2, ymm9 vpand ymm5, ymm3, ymm10 vpermd ymm4, ymm11, ymm4 vpsrad ymm2, ymm2, 1 vpsrad ymm3, ymm3, 1 vpslld ymm5, ymm5, 25 vpslld xmm4, xmm4, 25 vpaddd ymm2, ymm2, ymm5 vpaddd ymm3, ymm3, ymm4 test r12b, 1 jz L_256_mod_inv_avx2_4_vsubu_shr1 cmp r12, 1 jne L_256_mod_inv_avx2_4_uv_start mov rdx, r13 or rdx, r14 jne L_256_mod_inv_avx2_4_uv_start or rdx, r15 jne L_256_mod_inv_avx2_4_uv_start vpextrd eax, xmm2, 0 vpextrd r10d, xmm2, 1 vpextrd r12d, xmm2, 2 vpextrd r14d, xmm2, 3 vpextrd r9d, xmm3, 0 vpextrd r11d, xmm3, 1 vpextrd r13d, xmm3, 2 vpextrd r15d, xmm3, 3 vextracti128 xmm2, ymm2, 1 vextracti128 xmm3, ymm3, 1 vpextrd edi, xmm2, 0 vpextrd esi, xmm3, 0 L_256_mod_inv_avx2_4_store_done: mov edx, eax and eax, 67108863 sar edx, 26 add r9d, edx mov edx, r9d and r9d, 67108863 sar edx, 26 add r10d, edx mov edx, r10d and r10d, 67108863 sar edx, 26 add r11d, edx mov edx, r11d and r11d, 67108863 sar edx, 26 add r12d, edx mov edx, r12d and r12d, 67108863 sar edx, 26 add r13d, edx mov edx, r13d and r13d, 67108863 sar edx, 26 add r14d, edx mov edx, r14d and r14d, 67108863 sar edx, 26 add r15d, edx mov edx, r15d and r15d, 67108863 sar edx, 26 add edi, edx mov edx, edi and edi, 67108863 sar edx, 26 add esi, edx movsxd r9, r9d movsxd r11, r11d movsxd r13, r13d movsxd r15, r15d movsxd rsi, esi shl r9, 26 shl r11, 26 shl r13, 26 shl r15, 26 shl rsi, 26 movsxd rax, eax add rax, r9 movsxd r10, r10d adc r10, r11 movsxd r12, r12d adc r12, r13 movsxd r14, r14d adc r14, r15 movsxd rdi, edi adc rdi, rsi jge L_256_mod_inv_avx2_4_3_no_add_order mov r9, 2756213597218129 mov r11, 3054930678533947 mov r13, 4503599622973178 mov r15, 68719476735 mov rsi, 281474976645120 add rax, r9 add r10, r11 add r12, r13 add r14, r15 add rdi, rsi mov rdx, 4503599627370495 mov r9, rax and rax, rdx sar r9, 52 add r10, r9 mov r11, r10 and r10, rdx sar r11, 52 add r12, r11 mov r13, r12 and r12, rdx sar r13, 52 add r14, r13 mov r15, r14 and r14, rdx sar r15, 52 add rdi, r15 L_256_mod_inv_avx2_4_3_no_add_order: mov r9, r10 mov r11, r12 mov r13, r14 shl r9, 52 sar r10, 12 shl r11, 40 sar r12, 24 shl r13, 28 sar r14, 36 shl rdi, 16 add rax, r9 adc r10, r11 adc r12, r13 adc r14, rdi mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r14 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] vmovdqu xmm14, OWORD PTR [rsp+128] add rsp, 144 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_256_mod_inv_avx2_4 ENDP _text ENDS ENDIF ENDIF IFDEF WOLFSSL_SP_384 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_384_mul_6 PROC push r12 mov r9, rdx sub rsp, 48 ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9] xor r12, r12 mov QWORD PTR [rsp], rax mov r11, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+8], r11 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+16], r12 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+24], r10 ; A[0] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+32], r11 ; A[0] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+40], r12 ; A[1] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+8] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+48], r10 ; A[2] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+16] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+56], r11 ; A[3] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+24] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+64], r12 ; A[4] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+32] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+72], r10 ; A[5] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx mov QWORD PTR [rcx+80], r11 mov QWORD PTR [rcx+88], r12 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r10, QWORD PTR [rsp+16] mov r11, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx add rsp, 48 pop r12 ret sp_384_mul_6 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r Result of multiplication. ; * a First number to multiply. ; * b Second number to multiply. ; */ _text SEGMENT READONLY PARA sp_384_mul_avx2_6 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov rax, rdx sub rsp, 40 xor rbx, rbx mov rdx, QWORD PTR [rax] ; A[0] * B[0] mulx r12, r11, QWORD PTR [r8] ; A[0] * B[1] mulx r13, r9, QWORD PTR [r8+8] adcx r12, r9 ; A[0] * B[2] mulx r14, r9, QWORD PTR [r8+16] adcx r13, r9 ; A[0] * B[3] mulx r15, r9, QWORD PTR [r8+24] adcx r14, r9 ; A[0] * B[4] mulx rdi, r9, QWORD PTR [r8+32] adcx r15, r9 ; A[0] * B[5] mulx rsi, r9, QWORD PTR [r8+40] adcx rdi, r9 adcx rsi, rbx mov QWORD PTR [rsp], r11 mov r11, 0 adcx r11, rbx xor rbx, rbx mov rdx, QWORD PTR [rax+8] ; A[1] * B[0] mulx r10, r9, QWORD PTR [r8] adcx r12, r9 adox r13, r10 ; A[1] * B[1] mulx r10, r9, QWORD PTR [r8+8] adcx r13, r9 adox r14, r10 ; A[1] * B[2] mulx r10, r9, QWORD PTR [r8+16] adcx r14, r9 adox r15, r10 ; A[1] * B[3] mulx r10, r9, QWORD PTR [r8+24] adcx r15, r9 adox rdi, r10 ; A[1] * B[4] mulx r10, r9, QWORD PTR [r8+32] adcx rdi, r9 adox rsi, r10 ; A[1] * B[5] mulx r10, r9, QWORD PTR [r8+40] adcx rsi, r9 adox r11, r10 adcx r11, rbx mov QWORD PTR [rsp+8], r12 mov r12, 0 adcx r12, rbx adox r12, rbx xor rbx, rbx mov rdx, QWORD PTR [rax+16] ; A[2] * B[0] mulx r10, r9, QWORD PTR [r8] adcx r13, r9 adox r14, r10 ; A[2] * B[1] mulx r10, r9, QWORD PTR [r8+8] adcx r14, r9 adox r15, r10 ; A[2] * B[2] mulx r10, r9, QWORD PTR [r8+16] adcx r15, r9 adox rdi, r10 ; A[2] * B[3] mulx r10, r9, QWORD PTR [r8+24] adcx rdi, r9 adox rsi, r10 ; A[2] * B[4] mulx r10, r9, QWORD PTR [r8+32] adcx rsi, r9 adox r11, r10 ; A[2] * B[5] mulx r10, r9, QWORD PTR [r8+40] adcx r11, r9 adox r12, r10 adcx r12, rbx mov QWORD PTR [rsp+16], r13 mov r13, 0 adcx r13, rbx adox r13, rbx xor rbx, rbx mov rdx, QWORD PTR [rax+24] ; A[3] * B[0] mulx r10, r9, QWORD PTR [r8] adcx r14, r9 adox r15, r10 ; A[3] * B[1] mulx r10, r9, QWORD PTR [r8+8] adcx r15, r9 adox rdi, r10 ; A[3] * B[2] mulx r10, r9, QWORD PTR [r8+16] adcx rdi, r9 adox rsi, r10 ; A[3] * B[3] mulx r10, r9, QWORD PTR [r8+24] adcx rsi, r9 adox r11, r10 ; A[3] * B[4] mulx r10, r9, QWORD PTR [r8+32] adcx r11, r9 adox r12, r10 ; A[3] * B[5] mulx r10, r9, QWORD PTR [r8+40] adcx r12, r9 adox r13, r10 adcx r13, rbx mov QWORD PTR [rsp+24], r14 mov r14, 0 adcx r14, rbx adox r14, rbx xor rbx, rbx mov rdx, QWORD PTR [rax+32] ; A[4] * B[0] mulx r10, r9, QWORD PTR [r8] adcx r15, r9 adox rdi, r10 ; A[4] * B[1] mulx r10, r9, QWORD PTR [r8+8] adcx rdi, r9 adox rsi, r10 ; A[4] * B[2] mulx r10, r9, QWORD PTR [r8+16] adcx rsi, r9 adox r11, r10 ; A[4] * B[3] mulx r10, r9, QWORD PTR [r8+24] adcx r11, r9 adox r12, r10 ; A[4] * B[4] mulx r10, r9, QWORD PTR [r8+32] adcx r12, r9 adox r13, r10 ; A[4] * B[5] mulx r10, r9, QWORD PTR [r8+40] adcx r13, r9 adox r14, r10 adcx r14, rbx mov QWORD PTR [rsp+32], r15 mov rdx, QWORD PTR [rax+40] ; A[5] * B[0] mulx r10, r9, QWORD PTR [r8] adcx rdi, r9 adox rsi, r10 ; A[5] * B[1] mulx r10, r9, QWORD PTR [r8+8] adcx rsi, r9 adox r11, r10 ; A[5] * B[2] mulx r10, r9, QWORD PTR [r8+16] adcx r11, r9 adox r12, r10 ; A[5] * B[3] mulx r10, r9, QWORD PTR [r8+24] adcx r12, r9 adox r13, r10 ; A[5] * B[4] mulx r10, r9, QWORD PTR [r8+32] adcx r13, r9 adox r14, r10 ; A[5] * B[5] mulx r15, r9, QWORD PTR [r8+40] adcx r14, r9 adox r15, rbx adcx r15, rbx mov QWORD PTR [rcx+40], rdi mov QWORD PTR [rcx+48], rsi mov QWORD PTR [rcx+56], r11 mov QWORD PTR [rcx+64], r12 mov QWORD PTR [rcx+72], r13 mov QWORD PTR [rcx+80], r14 mov QWORD PTR [rcx+88], r15 mov r11, QWORD PTR [rsp] mov r12, QWORD PTR [rsp+8] mov r13, QWORD PTR [rsp+16] mov r14, QWORD PTR [rsp+24] mov r15, QWORD PTR [rsp+32] mov QWORD PTR [rcx], r11 mov QWORD PTR [rcx+8], r12 mov QWORD PTR [rcx+16], r13 mov QWORD PTR [rcx+24], r14 mov QWORD PTR [rcx+32], r15 add rsp, 40 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_mul_avx2_6 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_384_sqr_6 PROC push r12 push r13 push r14 mov r8, rdx sub rsp, 48 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax xor r11, r11 mov QWORD PTR [rsp], rax mov r10, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+8], r10 ; A[0] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rsp+16], r11 ; A[0] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[1] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+8] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+24], r9 ; A[0] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[1] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+8] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+32], r10 ; A[0] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+40], r11 ; A[1] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+8] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[2] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+16] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+48], r9 ; A[2] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+16] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[3] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+24] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rcx+56], r10 ; A[3] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+24] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[4] * A[4] mov rax, QWORD PTR [r8+32] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+64], r11 ; A[4] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+32] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+72], r9 ; A[5] * A[5] mov rax, QWORD PTR [r8+40] mul rax add r10, rax adc r11, rdx mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r12, QWORD PTR [rsp+16] mov r13, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx add rsp, 48 pop r14 pop r13 pop r12 ret sp_384_sqr_6 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; */ _text SEGMENT READONLY PARA sp_384_sqr_avx2_6 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov rax, rdx push rcx xor rcx, rcx mov rdx, QWORD PTR [rax] mov rsi, QWORD PTR [rax+8] mov rbx, QWORD PTR [rax+16] mov rbp, QWORD PTR [rax+24] ; Diagonal 0 ; A[1] * A[0] mulx r11, r10, QWORD PTR [rax+8] ; A[2] * A[0] mulx r12, r8, QWORD PTR [rax+16] adcx r11, r8 ; A[3] * A[0] mulx r13, r8, QWORD PTR [rax+24] adcx r12, r8 ; A[4] * A[0] mulx r14, r8, QWORD PTR [rax+32] adcx r13, r8 ; A[5] * A[0] mulx r15, r8, QWORD PTR [rax+40] adcx r14, r8 adcx r15, rcx ; Diagonal 1 mov rdx, rsi ; A[2] * A[1] mulx r9, r8, QWORD PTR [rax+16] adcx r12, r8 adox r13, r9 ; A[3] * A[1] mulx r9, r8, QWORD PTR [rax+24] adcx r13, r8 adox r14, r9 ; A[4] * A[1] mulx r9, r8, QWORD PTR [rax+32] adcx r14, r8 adox r15, r9 ; A[5] * A[1] mulx rdi, r8, QWORD PTR [rax+40] adcx r15, r8 adox rdi, rcx mov rdx, rbx ; A[5] * A[2] mulx rsi, r8, QWORD PTR [rax+40] adcx rdi, r8 adox rsi, rcx adcx rsi, rcx adcx rbx, rcx ; Diagonal 2 ; A[3] * A[2] mulx r9, r8, QWORD PTR [rax+24] adcx r14, r8 adox r15, r9 ; A[4] * A[2] mulx r9, r8, QWORD PTR [rax+32] adcx r15, r8 adox rdi, r9 mov rdx, rbp ; A[4] * A[3] mulx r9, r8, QWORD PTR [rax+32] adcx rdi, r8 adox rsi, r9 ; A[5] * A[3] mulx rbx, r8, QWORD PTR [rax+40] adcx rsi, r8 adox rbx, rcx mov rdx, QWORD PTR [rax+32] ; A[5] * A[4] mulx rbp, r8, QWORD PTR [rax+40] adcx rbx, r8 adox rbp, rcx adcx rbp, rcx adcx rcx, rcx ; Doubling previous result as we add in square words results ; A[0] * A[0] mov rdx, QWORD PTR [rax] mulx r9, r8, rdx pop rdx mov QWORD PTR [rdx], r8 adox r10, r10 push rdx adcx r10, r9 ; A[1] * A[1] mov rdx, QWORD PTR [rax+8] mulx r9, r8, rdx adox r11, r11 adcx r11, r8 adox r12, r12 adcx r12, r9 ; A[2] * A[2] mov rdx, QWORD PTR [rax+16] mulx r9, r8, rdx adox r13, r13 adcx r13, r8 adox r14, r14 adcx r14, r9 ; A[3] * A[3] mov rdx, QWORD PTR [rax+24] mulx r9, r8, rdx adox r15, r15 adcx r15, r8 adox rdi, rdi adcx rdi, r9 ; A[4] * A[4] mov rdx, QWORD PTR [rax+32] mulx r9, r8, rdx adox rsi, rsi adcx rsi, r8 adox rbx, rbx adcx rbx, r9 ; A[5] * A[5] mov rdx, QWORD PTR [rax+40] mulx r9, r8, rdx adox rbp, rbp adcx rbp, r8 adcx r9, rcx mov r8, 0 adox r9, r8 pop rcx mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 mov QWORD PTR [rcx+32], r13 mov QWORD PTR [rcx+40], r14 mov QWORD PTR [rcx+48], r15 mov QWORD PTR [rcx+56], rdi mov QWORD PTR [rcx+64], rsi mov QWORD PTR [rcx+72], rbx mov QWORD PTR [rcx+80], rbp mov QWORD PTR [rcx+88], r9 pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_sqr_avx2_6 ENDP _text ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_384_add_6 PROC push r12 push r13 push r14 xor rax, rax mov r9, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] mov r13, QWORD PTR [rdx+32] mov r14, QWORD PTR [rdx+40] add r9, QWORD PTR [r8] adc r10, QWORD PTR [r8+8] adc r11, QWORD PTR [r8+16] adc r12, QWORD PTR [r8+24] adc r13, QWORD PTR [r8+32] adc r14, QWORD PTR [r8+40] mov QWORD PTR [rcx], r9 mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 mov QWORD PTR [rcx+32], r13 mov QWORD PTR [rcx+40], r14 adc rax, 0 pop r14 pop r13 pop r12 ret sp_384_add_6 ENDP _text ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_384_sub_6 PROC push r12 push r13 push r14 xor rax, rax mov r9, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] mov r13, QWORD PTR [rdx+32] mov r14, QWORD PTR [rdx+40] sub r9, QWORD PTR [r8] sbb r10, QWORD PTR [r8+8] sbb r11, QWORD PTR [r8+16] sbb r12, QWORD PTR [r8+24] sbb r13, QWORD PTR [r8+32] sbb r14, QWORD PTR [r8+40] mov QWORD PTR [rcx], r9 mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 mov QWORD PTR [rcx+32], r13 mov QWORD PTR [rcx+40], r14 sbb rax, rax pop r14 pop r13 pop r12 ret sp_384_sub_6 ENDP _text ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * ; * r A single precision number to copy over. ; * a A single precision number to copy. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_384_cond_copy_6 PROC push r12 push r13 mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [rcx+16] mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rcx+32] mov r13, QWORD PTR [rcx+40] xor rax, QWORD PTR [rdx] xor r9, QWORD PTR [rdx+8] xor r10, QWORD PTR [rdx+16] xor r11, QWORD PTR [rdx+24] xor r12, QWORD PTR [rdx+32] xor r13, QWORD PTR [rdx+40] and rax, r8 and r9, r8 and r10, r8 and r11, r8 and r12, r8 and r13, r8 xor QWORD PTR [rcx], rax xor QWORD PTR [rcx+8], r9 xor QWORD PTR [rcx+16], r10 xor QWORD PTR [rcx+24], r11 xor QWORD PTR [rcx+32], r12 xor QWORD PTR [rcx+40], r13 pop r13 pop r12 ret sp_384_cond_copy_6 ENDP _text ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_384_cond_sub_6 PROC sub rsp, 48 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov QWORD PTR [rcx+40], r11 sbb rax, rax add rsp, 48 ret sp_384_cond_sub_6 ENDP _text ENDS ; /* Reduce the number back to 384 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_384_mont_reduce_6 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r12, QWORD PTR [rcx] mov r13, QWORD PTR [rcx+8] mov r14, QWORD PTR [rcx+16] mov r15, QWORD PTR [rcx+24] mov rdi, QWORD PTR [rcx+32] mov rsi, QWORD PTR [rcx+40] xor r11, r11 ; a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp) mov rbx, QWORD PTR [rcx+48] mov rbp, QWORD PTR [rcx+56] mov rdx, r12 mov rax, r13 shld rax, rdx, 32 shl rdx, 32 add rdx, r12 adc rax, r13 add rax, r12 mov r8, rdx mov r9, rax mov r10, rax shld r9, r8, 32 shl r8, 32 shr r10, 32 add r12, r8 adc r13, r9 adc r14, r10 adc r15, 0 adc rdi, 0 adc rsi, 0 adc rbx, rdx adc rbp, rax adc r11, 0 add r8, rax adc r9, rdx adc r10, rax mov rax, 0 adc rax, 0 sub r14, r9 sbb r15, r10 sbb rdi, rax sbb rsi, 0 sbb rbx, 0 sbb rbp, 0 sbb r11, 0 ; a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp) mov r12, QWORD PTR [rcx+64] mov r13, QWORD PTR [rcx+72] mov rdx, r14 mov rax, r15 shld rax, rdx, 32 shl rdx, 32 add rdx, r14 adc rax, r15 add rax, r14 mov r8, rdx mov r9, rax mov r10, rax shld r9, r8, 32 shl r8, 32 shr r10, 32 add r12, r11 adc r13, 0 mov r11, 0 adc r11, 0 add r14, r8 adc r15, r9 adc rdi, r10 adc rsi, 0 adc rbx, 0 adc rbp, 0 adc r12, rdx adc r13, rax adc r11, 0 add r8, rax adc r9, rdx adc r10, rax mov rax, 0 adc rax, 0 sub rdi, r9 sbb rsi, r10 sbb rbx, rax sbb rbp, 0 sbb r12, 0 sbb r13, 0 sbb r11, 0 ; a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp) mov r14, QWORD PTR [rcx+80] mov r15, QWORD PTR [rcx+88] mov rdx, rdi mov rax, rsi shld rax, rdx, 32 shl rdx, 32 add rdx, rdi adc rax, rsi add rax, rdi mov r8, rdx mov r9, rax mov r10, rax shld r9, r8, 32 shl r8, 32 shr r10, 32 add r14, r11 adc r15, 0 mov r11, 0 adc r11, 0 add rdi, r8 adc rsi, r9 adc rbx, r10 adc rbp, 0 adc r12, 0 adc r13, 0 adc r14, rdx adc r15, rax adc r11, 0 add r8, rax adc r9, rdx adc r10, rax mov rax, 0 adc rax, 0 sub rbx, r9 sbb rbp, r10 sbb r12, rax sbb r13, 0 sbb r14, 0 sbb r15, 0 sbb r11, 0 ; Subtract mod if carry neg r11 mov r10, 18446744073709551614 mov r8d, r11d mov r9, r11 and r10, r11 shl r9, 32 sub rbx, r8 sbb rbp, r9 sbb r12, r10 sbb r13, r11 sbb r14, r11 sbb r15, r11 mov QWORD PTR [rcx], rbx mov QWORD PTR [rcx+8], rbp mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 mov QWORD PTR [rcx+32], r14 mov QWORD PTR [rcx+40], r15 pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_mont_reduce_6 ENDP _text ENDS ; /* Reduce the number back to 384 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_384_mont_reduce_order_6 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 6 mov r10, 6 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_384_mont_reduce_order_6_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r12, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r12 mov QWORD PTR [rcx+40], r14 adc QWORD PTR [rcx+48], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_384_mont_reduce_order_6_loop mov QWORD PTR [rcx], r15 mov QWORD PTR [rcx+8], rdi neg rsi IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx mov rcx, rcx sub rcx, 48 call sp_384_cond_sub_6 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_mont_reduce_order_6 ENDP _text ENDS ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_384_cmp_6 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_384_cmp_6 ENDP _text ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_384_mont_add_6 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] mov r13, QWORD PTR [rdx+40] add rax, QWORD PTR [r8] adc r9, QWORD PTR [r8+8] mov r15, 18446744069414584320 adc r10, QWORD PTR [r8+16] mov rdi, 18446744073709551614 adc r11, QWORD PTR [r8+24] adc r12, QWORD PTR [r8+32] adc r13, QWORD PTR [r8+40] sbb rdx, rdx mov r14d, edx and r15, rdx and rdi, rdx sub rax, r14 sbb r9, r15 sbb r10, rdi sbb r11, rdx sbb r12, rdx sbb r13, rdx adc rdx, 0 and r14, rdx and r15, rdx and rdi, rdx sub rax, r14 sbb r9, r15 mov QWORD PTR [rcx], rax sbb r10, rdi mov QWORD PTR [rcx+8], r9 sbb r11, rdx mov QWORD PTR [rcx+16], r10 sbb r12, rdx mov QWORD PTR [rcx+24], r11 sbb r13, rdx mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r13 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_mont_add_6 ENDP _text ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of doubling. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_384_mont_dbl_6 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r11, QWORD PTR [rdx+32] mov r12, QWORD PTR [rdx+40] add rax, rax adc r8, r8 mov r14, 18446744069414584320 adc r9, r9 mov r15, 18446744073709551614 adc r10, r10 adc r11, r11 mov rdi, r12 adc r12, r12 sar rdi, 63 mov r13d, edi and r14, rdi and r15, rdi sub rax, r13 sbb r8, r14 sbb r9, r15 sbb r10, rdi sbb r11, rdi sbb r12, rdi adc rdi, 0 and r13, rdi and r14, rdi and r15, rdi sub rax, r13 sbb r8, r14 mov QWORD PTR [rcx], rax sbb r9, r15 mov QWORD PTR [rcx+8], r8 sbb r10, rdi mov QWORD PTR [rcx+16], r9 sbb r11, rdi mov QWORD PTR [rcx+24], r10 sbb r12, rdi mov QWORD PTR [rcx+32], r11 mov QWORD PTR [rcx+40], r12 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_mont_dbl_6 ENDP _text ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of doubling. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_384_mont_tpl_6 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r11, QWORD PTR [rdx+32] mov r12, QWORD PTR [rdx+40] add rax, rax adc r8, r8 mov r14, 18446744069414584320 adc r9, r9 mov r15, 18446744073709551614 adc r10, r10 adc r11, r11 adc r12, r12 sbb rdi, rdi mov r13d, edi and r14, rdi and r15, rdi sub rax, r13 sbb r8, r14 sbb r9, r15 sbb r10, rdi sbb r11, rdi sbb r12, rdi adc rdi, 0 and r13, rdi and r14, rdi and r15, rdi sub rax, r13 sbb r8, r14 mov QWORD PTR [rcx], rax sbb r9, r15 sbb r10, rdi sbb r11, rdi sbb r12, rdi add rax, QWORD PTR [rdx] adc r8, QWORD PTR [rdx+8] mov r14, 18446744069414584320 adc r9, QWORD PTR [rdx+16] mov r15, 18446744073709551614 adc r10, QWORD PTR [rdx+24] adc r11, QWORD PTR [rdx+32] adc r12, QWORD PTR [rdx+40] sbb rdi, rdi mov r13d, edi and r14, rdi and r15, rdi sub rax, r13 sbb r8, r14 sbb r9, r15 sbb r10, rdi sbb r11, rdi sbb r12, rdi adc rdi, 0 and r13, rdi and r14, rdi and r15, rdi sub rax, r13 sbb r8, r14 mov QWORD PTR [rcx], rax sbb r9, r15 mov QWORD PTR [rcx+8], r8 sbb r10, rdi mov QWORD PTR [rcx+16], r9 sbb r11, rdi mov QWORD PTR [rcx+24], r10 sbb r12, rdi mov QWORD PTR [rcx+32], r11 mov QWORD PTR [rcx+40], r12 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_mont_tpl_6 ENDP _text ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of subtration. ; * a Number to subtract from in Montgomery form. ; * b Number to subtract with in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_384_mont_sub_6 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] mov r13, QWORD PTR [rdx+40] sub rax, QWORD PTR [r8] sbb r9, QWORD PTR [r8+8] mov r15, 18446744069414584320 sbb r10, QWORD PTR [r8+16] mov rdi, 18446744073709551614 sbb r11, QWORD PTR [r8+24] sbb r12, QWORD PTR [r8+32] sbb r13, QWORD PTR [r8+40] sbb rdx, rdx mov r14d, edx and r15, rdx and rdi, rdx add rax, r14 adc r9, r15 adc r10, rdi adc r11, rdx adc r12, rdx adc r13, rdx adc rdx, 0 and r14, rdx and r15, rdx and rdi, rdx add rax, r14 adc r9, r15 mov QWORD PTR [rcx], rax adc r10, rdi mov QWORD PTR [rcx+8], r9 adc r11, rdx mov QWORD PTR [rcx+16], r10 adc r12, rdx mov QWORD PTR [rcx+24], r11 adc r13, rdx mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r13 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_mont_sub_6 ENDP _text ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_384_mont_div2_6 PROC push r12 push r13 sub rsp, 48 mov r13, QWORD PTR [rdx] xor r12, r12 mov rax, r13 and r13, 1 neg r13 mov r10, QWORD PTR [r8] and r10, r13 mov QWORD PTR [rsp], r10 mov r10, QWORD PTR [r8+8] and r10, r13 mov QWORD PTR [rsp+8], r10 mov r10, QWORD PTR [r8+16] and r10, r13 mov QWORD PTR [rsp+16], r10 mov r10, QWORD PTR [r8+24] and r10, r13 mov QWORD PTR [rsp+24], r10 mov r10, QWORD PTR [r8+32] and r10, r13 mov QWORD PTR [rsp+32], r10 mov r10, QWORD PTR [r8+40] and r10, r13 mov QWORD PTR [rsp+40], r10 add QWORD PTR [rsp], rax mov rax, QWORD PTR [rdx+8] adc QWORD PTR [rsp+8], rax mov rax, QWORD PTR [rdx+16] adc QWORD PTR [rsp+16], rax mov rax, QWORD PTR [rdx+24] adc QWORD PTR [rsp+24], rax mov rax, QWORD PTR [rdx+32] adc QWORD PTR [rsp+32], rax mov rax, QWORD PTR [rdx+40] adc QWORD PTR [rsp+40], rax adc r12, 0 mov rax, QWORD PTR [rsp] mov r9, QWORD PTR [rsp+8] shrd rax, r9, 1 mov QWORD PTR [rcx], rax mov rax, QWORD PTR [rsp+16] shrd r9, rax, 1 mov QWORD PTR [rcx+8], r9 mov r9, QWORD PTR [rsp+24] shrd rax, r9, 1 mov QWORD PTR [rcx+16], rax mov rax, QWORD PTR [rsp+32] shrd r9, rax, 1 mov QWORD PTR [rcx+24], r9 mov r9, QWORD PTR [rsp+40] shrd rax, r9, 1 mov QWORD PTR [rcx+32], rax shrd r9, r12, 1 mov QWORD PTR [rcx+40], r9 add rsp, 48 pop r13 pop r12 ret sp_384_mont_div2_6 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ _text SEGMENT READONLY PARA sp_384_get_point_33_6 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 movdqu OWORD PTR [rsp+96], xmm12 movdqu OWORD PTR [rsp+112], xmm13 movdqu OWORD PTR [rsp+128], xmm14 movdqu OWORD PTR [rsp+144], xmm15 mov rax, 1 movd xmm13, r8d add rdx, 296 movd xmm15, eax mov rax, 32 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm5, xmm5 movdqa xmm14, xmm15 L_384_get_point_33_6_start_1: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 movdqu xmm6, OWORD PTR [rdx] movdqu xmm7, OWORD PTR [rdx+16] movdqu xmm8, OWORD PTR [rdx+32] movdqu xmm9, OWORD PTR [rdx+96] movdqu xmm10, OWORD PTR [rdx+112] movdqu xmm11, OWORD PTR [rdx+128] add rdx, 296 pand xmm6, xmm12 pand xmm7, xmm12 pand xmm8, xmm12 pand xmm9, xmm12 pand xmm10, xmm12 pand xmm11, xmm12 por xmm0, xmm6 por xmm1, xmm7 por xmm2, xmm8 por xmm3, xmm9 por xmm4, xmm10 por xmm5, xmm11 dec rax jnz L_384_get_point_33_6_start_1 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+32], xmm2 movdqu OWORD PTR [rcx+96], xmm3 movdqu OWORD PTR [rcx+112], xmm4 movdqu OWORD PTR [rcx+128], xmm5 mov rax, 1 movd xmm13, r8d sub rdx, 9472 movd xmm15, eax mov rax, 32 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 movdqa xmm14, xmm15 L_384_get_point_33_6_start_2: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 movdqu xmm6, OWORD PTR [rdx+192] movdqu xmm7, OWORD PTR [rdx+208] movdqu xmm8, OWORD PTR [rdx+224] add rdx, 296 pand xmm6, xmm12 pand xmm7, xmm12 pand xmm8, xmm12 por xmm0, xmm6 por xmm1, xmm7 por xmm2, xmm8 dec rax jnz L_384_get_point_33_6_start_2 movdqu OWORD PTR [rcx+192], xmm0 movdqu OWORD PTR [rcx+208], xmm1 movdqu OWORD PTR [rcx+224], xmm2 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] movdqu xmm12, OWORD PTR [rsp+96] movdqu xmm13, OWORD PTR [rsp+112] movdqu xmm14, OWORD PTR [rsp+128] movdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 ret sp_384_get_point_33_6 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible point that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ _text SEGMENT READONLY PARA sp_384_get_point_33_avx2_6 PROC sub rsp, 160 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 vmovdqu OWORD PTR [rsp+128], xmm14 vmovdqu OWORD PTR [rsp+144], xmm15 mov rax, 1 movd xmm13, r8d add rdx, 296 movd xmm15, eax mov rax, 32 vpxor ymm14, ymm14, ymm14 vpermd ymm13, ymm14, ymm13 vpermd ymm15, ymm14, ymm15 vpxor ymm0, ymm0, ymm0 vpxor xmm1, xmm1, xmm1 vpxor ymm2, ymm2, ymm2 vpxor xmm3, xmm3, xmm3 vpxor ymm4, ymm4, ymm4 vpxor xmm5, xmm5, xmm5 vmovdqa ymm14, ymm15 L_384_get_point_33_avx2_6_start: vpcmpeqd ymm12, ymm14, ymm13 vpaddd ymm14, ymm14, ymm15 vmovupd ymm6, YMMWORD PTR [rdx] vmovdqu xmm7, OWORD PTR [rdx+32] vmovupd ymm8, YMMWORD PTR [rdx+96] vmovdqu xmm9, OWORD PTR [rdx+128] vmovupd ymm10, YMMWORD PTR [rdx+192] vmovdqu xmm11, OWORD PTR [rdx+224] add rdx, 296 vpand ymm6, ymm6, ymm12 vpand xmm7, xmm7, xmm12 vpand ymm8, ymm8, ymm12 vpand xmm9, xmm9, xmm12 vpand ymm10, ymm10, ymm12 vpand xmm11, xmm11, xmm12 vpor ymm0, ymm0, ymm6 vpor xmm1, xmm1, xmm7 vpor ymm2, ymm2, ymm8 vpor xmm3, xmm3, xmm9 vpor ymm4, ymm4, ymm10 vpor xmm5, xmm5, xmm11 dec rax jnz L_384_get_point_33_avx2_6_start vmovupd YMMWORD PTR [rcx], ymm0 vmovdqu OWORD PTR [rcx+32], xmm1 vmovupd YMMWORD PTR [rcx+96], ymm2 vmovdqu OWORD PTR [rcx+128], xmm3 vmovupd YMMWORD PTR [rcx+192], ymm4 vmovdqu OWORD PTR [rcx+224], xmm5 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] vmovdqu xmm14, OWORD PTR [rsp+128] vmovdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 ret sp_384_get_point_33_avx2_6 ENDP _text ENDS ENDIF ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 384 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_384_mont_reduce_order_avx2_6 PROC push r12 push r13 push r14 push r15 mov rax, rdx xor r15, r15 mov r14, QWORD PTR [rcx] xor r13, r13 L_mont_loop_order_avx2_6: ; mu = a[i] * mp mov rdx, r14 mov r11, r14 imul rdx, r8 xor r13, r13 ; a[i+0] += m[0] * mu mulx r10, r9, QWORD PTR [rax] mov r14, QWORD PTR [rcx+8] adcx r11, r9 adox r14, r10 ; a[i+1] += m[1] * mu mulx r10, r9, QWORD PTR [rax+8] mov r11, QWORD PTR [rcx+16] adcx r14, r9 adox r11, r10 ; a[i+2] += m[2] * mu mulx r10, r9, QWORD PTR [rax+16] mov r12, QWORD PTR [rcx+24] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; a[i+3] += m[3] * mu mulx r10, r9, QWORD PTR [rax+24] mov r11, QWORD PTR [rcx+32] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; a[i+4] += m[4] * mu mulx r10, r9, QWORD PTR [rax+32] mov r12, QWORD PTR [rcx+40] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; a[i+5] += m[5] * mu mulx r10, r9, QWORD PTR [rax+40] mov r11, QWORD PTR [rcx+48] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 adcx r11, r15 mov QWORD PTR [rcx+48], r11 mov r15, r13 adox r15, r13 adcx r15, r13 ; mu = a[i] * mp mov rdx, r14 mov r11, r14 imul rdx, r8 xor r13, r13 ; a[i+0] += m[0] * mu mulx r10, r9, QWORD PTR [rax] mov r14, QWORD PTR [rcx+16] adcx r11, r9 adox r14, r10 ; a[i+1] += m[1] * mu mulx r10, r9, QWORD PTR [rax+8] mov r11, QWORD PTR [rcx+24] adcx r14, r9 adox r11, r10 ; a[i+2] += m[2] * mu mulx r10, r9, QWORD PTR [rax+16] mov r12, QWORD PTR [rcx+32] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+24], r11 ; a[i+3] += m[3] * mu mulx r10, r9, QWORD PTR [rax+24] mov r11, QWORD PTR [rcx+40] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+32], r12 ; a[i+4] += m[4] * mu mulx r10, r9, QWORD PTR [rax+32] mov r12, QWORD PTR [rcx+48] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+40], r11 ; a[i+5] += m[5] * mu mulx r10, r9, QWORD PTR [rax+40] mov r11, QWORD PTR [rcx+56] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+48], r12 adcx r11, r15 mov QWORD PTR [rcx+56], r11 mov r15, r13 adox r15, r13 adcx r15, r13 ; mu = a[i] * mp mov rdx, r14 mov r11, r14 imul rdx, r8 xor r13, r13 ; a[i+0] += m[0] * mu mulx r10, r9, QWORD PTR [rax] mov r14, QWORD PTR [rcx+24] adcx r11, r9 adox r14, r10 ; a[i+1] += m[1] * mu mulx r10, r9, QWORD PTR [rax+8] mov r11, QWORD PTR [rcx+32] adcx r14, r9 adox r11, r10 ; a[i+2] += m[2] * mu mulx r10, r9, QWORD PTR [rax+16] mov r12, QWORD PTR [rcx+40] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; a[i+3] += m[3] * mu mulx r10, r9, QWORD PTR [rax+24] mov r11, QWORD PTR [rcx+48] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; a[i+4] += m[4] * mu mulx r10, r9, QWORD PTR [rax+32] mov r12, QWORD PTR [rcx+56] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; a[i+5] += m[5] * mu mulx r10, r9, QWORD PTR [rax+40] mov r11, QWORD PTR [rcx+64] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 adcx r11, r15 mov QWORD PTR [rcx+64], r11 mov r15, r13 adox r15, r13 adcx r15, r13 ; mu = a[i] * mp mov rdx, r14 mov r11, r14 imul rdx, r8 xor r13, r13 ; a[i+0] += m[0] * mu mulx r10, r9, QWORD PTR [rax] mov r14, QWORD PTR [rcx+32] adcx r11, r9 adox r14, r10 ; a[i+1] += m[1] * mu mulx r10, r9, QWORD PTR [rax+8] mov r11, QWORD PTR [rcx+40] adcx r14, r9 adox r11, r10 ; a[i+2] += m[2] * mu mulx r10, r9, QWORD PTR [rax+16] mov r12, QWORD PTR [rcx+48] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+40], r11 ; a[i+3] += m[3] * mu mulx r10, r9, QWORD PTR [rax+24] mov r11, QWORD PTR [rcx+56] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+48], r12 ; a[i+4] += m[4] * mu mulx r10, r9, QWORD PTR [rax+32] mov r12, QWORD PTR [rcx+64] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+56], r11 ; a[i+5] += m[5] * mu mulx r10, r9, QWORD PTR [rax+40] mov r11, QWORD PTR [rcx+72] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+64], r12 adcx r11, r15 mov QWORD PTR [rcx+72], r11 mov r15, r13 adox r15, r13 adcx r15, r13 ; mu = a[i] * mp mov rdx, r14 mov r11, r14 imul rdx, r8 xor r13, r13 ; a[i+0] += m[0] * mu mulx r10, r9, QWORD PTR [rax] mov r14, QWORD PTR [rcx+40] adcx r11, r9 adox r14, r10 ; a[i+1] += m[1] * mu mulx r10, r9, QWORD PTR [rax+8] mov r11, QWORD PTR [rcx+48] adcx r14, r9 adox r11, r10 ; a[i+2] += m[2] * mu mulx r10, r9, QWORD PTR [rax+16] mov r12, QWORD PTR [rcx+56] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; a[i+3] += m[3] * mu mulx r10, r9, QWORD PTR [rax+24] mov r11, QWORD PTR [rcx+64] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; a[i+4] += m[4] * mu mulx r10, r9, QWORD PTR [rax+32] mov r12, QWORD PTR [rcx+72] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+64], r11 ; a[i+5] += m[5] * mu mulx r10, r9, QWORD PTR [rax+40] mov r11, QWORD PTR [rcx+80] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+72], r12 adcx r11, r15 mov QWORD PTR [rcx+80], r11 mov r15, r13 adox r15, r13 adcx r15, r13 ; mu = a[i] * mp mov rdx, r14 mov r11, r14 imul rdx, r8 xor r13, r13 ; a[i+0] += m[0] * mu mulx r10, r9, QWORD PTR [rax] mov r14, QWORD PTR [rcx+48] adcx r11, r9 adox r14, r10 ; a[i+1] += m[1] * mu mulx r10, r9, QWORD PTR [rax+8] mov r11, QWORD PTR [rcx+56] adcx r14, r9 adox r11, r10 ; a[i+2] += m[2] * mu mulx r10, r9, QWORD PTR [rax+16] mov r12, QWORD PTR [rcx+64] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+56], r11 ; a[i+3] += m[3] * mu mulx r10, r9, QWORD PTR [rax+24] mov r11, QWORD PTR [rcx+72] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+64], r12 ; a[i+4] += m[4] * mu mulx r10, r9, QWORD PTR [rax+32] mov r12, QWORD PTR [rcx+80] adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+72], r11 ; a[i+5] += m[5] * mu mulx r10, r9, QWORD PTR [rax+40] mov r11, QWORD PTR [rcx+88] adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+80], r12 adcx r11, r15 mov QWORD PTR [rcx+88], r11 mov r15, r13 adox r15, r13 adcx r15, r13 neg r15 mov r8, rcx add rcx, 48 mov r10, QWORD PTR [rax] mov rdx, r14 pext r10, r10, r15 sub rdx, r10 mov r10, QWORD PTR [rax+8] mov r9, QWORD PTR [rcx+8] pext r10, r10, r15 mov QWORD PTR [r8], rdx sbb r9, r10 mov rdx, QWORD PTR [rax+16] mov r10, QWORD PTR [rcx+16] pext rdx, rdx, r15 mov QWORD PTR [r8+8], r9 sbb r10, rdx mov r9, QWORD PTR [rax+24] mov rdx, QWORD PTR [rcx+24] pext r9, r9, r15 mov QWORD PTR [r8+16], r10 sbb rdx, r9 mov r10, QWORD PTR [rax+32] mov r9, QWORD PTR [rcx+32] pext r10, r10, r15 mov QWORD PTR [r8+24], rdx sbb r9, r10 mov rdx, QWORD PTR [rax+40] mov r10, QWORD PTR [rcx+40] pext rdx, rdx, r15 mov QWORD PTR [r8+32], r9 sbb r10, rdx mov QWORD PTR [r8+40], r10 pop r15 pop r14 pop r13 pop r12 ret sp_384_mont_reduce_order_avx2_6 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_384_cond_sub_avx2_6 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov QWORD PTR [rcx+40], r12 sbb rax, rax pop r12 ret sp_384_cond_sub_avx2_6 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_384_mont_div2_avx2_6 PROC push r12 push r13 mov r13, QWORD PTR [rdx] xor r12, r12 mov r10, r13 and r13, 1 neg r13 mov rax, QWORD PTR [r8] mov r9, QWORD PTR [r8+8] mov r10, QWORD PTR [rdx] mov r11, QWORD PTR [rdx+8] pext rax, rax, r13 pext r9, r9, r13 add r10, rax adc r11, r9 mov QWORD PTR [rcx], r10 mov QWORD PTR [rcx+8], r11 mov rax, QWORD PTR [r8+16] mov r9, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [r8+32] mov r9, QWORD PTR [r8+40] mov r10, QWORD PTR [rdx+32] mov r11, QWORD PTR [rdx+40] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+32], r10 mov QWORD PTR [rcx+40], r11 adc r12, 0 mov r10, QWORD PTR [rcx] mov r11, QWORD PTR [rcx+8] shrd r10, r11, 1 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rcx+16] shrd r11, r10, 1 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rcx+24] shrd r10, r11, 1 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rcx+32] shrd r11, r10, 1 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rcx+40] shrd r10, r11, 1 mov QWORD PTR [rcx+32], r10 shrd r11, r12, 1 mov QWORD PTR [rcx+40], r11 pop r13 pop r12 ret sp_384_mont_div2_avx2_6 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_384_get_entry_64_6 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 movdqu OWORD PTR [rsp+96], xmm12 movdqu OWORD PTR [rsp+112], xmm13 movdqu OWORD PTR [rsp+128], xmm14 movdqu OWORD PTR [rsp+144], xmm15 ; From entry 1 mov rax, 1 movd xmm13, r8d add rdx, 96 movd xmm15, eax mov rax, 63 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm5, xmm5 movdqa xmm14, xmm15 L_384_get_entry_64_6_start_0: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 movdqu xmm6, OWORD PTR [rdx] movdqu xmm7, OWORD PTR [rdx+16] movdqu xmm8, OWORD PTR [rdx+32] movdqu xmm9, OWORD PTR [rdx+48] movdqu xmm10, OWORD PTR [rdx+64] movdqu xmm11, OWORD PTR [rdx+80] add rdx, 96 pand xmm6, xmm12 pand xmm7, xmm12 pand xmm8, xmm12 pand xmm9, xmm12 pand xmm10, xmm12 pand xmm11, xmm12 por xmm0, xmm6 por xmm1, xmm7 por xmm2, xmm8 por xmm3, xmm9 por xmm4, xmm10 por xmm5, xmm11 dec rax jnz L_384_get_entry_64_6_start_0 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+32], xmm2 movdqu OWORD PTR [rcx+96], xmm3 movdqu OWORD PTR [rcx+112], xmm4 movdqu OWORD PTR [rcx+128], xmm5 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] movdqu xmm12, OWORD PTR [rsp+96] movdqu xmm13, OWORD PTR [rsp+112] movdqu xmm14, OWORD PTR [rsp+128] movdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 ret sp_384_get_entry_64_6 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_384_get_entry_64_avx2_6 PROC sub rsp, 96 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 mov rax, 1 movd xmm9, r8d add rdx, 96 movd xmm11, eax mov rax, 64 vpxor ymm10, ymm10, ymm10 vpermd ymm9, ymm10, ymm9 vpermd ymm11, ymm10, ymm11 vpxor ymm0, ymm0, ymm0 vpxor xmm1, xmm1, xmm1 vpxor ymm2, ymm2, ymm2 vpxor xmm3, xmm3, xmm3 vmovdqa ymm10, ymm11 L_384_get_entry_64_avx2_6_start: vpcmpeqd ymm8, ymm10, ymm9 vpaddd ymm10, ymm10, ymm11 vmovupd ymm4, YMMWORD PTR [rdx] vmovdqu xmm5, OWORD PTR [rdx+32] vmovupd ymm6, YMMWORD PTR [rdx+48] vmovdqu xmm7, OWORD PTR [rdx+80] add rdx, 96 vpand ymm4, ymm4, ymm8 vpand xmm5, xmm5, xmm8 vpand ymm6, ymm6, ymm8 vpand xmm7, xmm7, xmm8 vpor ymm0, ymm0, ymm4 vpor xmm1, xmm1, xmm5 vpor ymm2, ymm2, ymm6 vpor xmm3, xmm3, xmm7 dec rax jnz L_384_get_entry_64_avx2_6_start vmovupd YMMWORD PTR [rcx], ymm0 vmovdqu OWORD PTR [rcx+32], xmm1 vmovupd YMMWORD PTR [rcx+96], ymm2 vmovdqu OWORD PTR [rcx+128], xmm3 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] add rsp, 96 ret sp_384_get_entry_64_avx2_6 ENDP _text ENDS ENDIF ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_384_get_entry_65_6 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 movdqu OWORD PTR [rsp+96], xmm12 movdqu OWORD PTR [rsp+112], xmm13 movdqu OWORD PTR [rsp+128], xmm14 movdqu OWORD PTR [rsp+144], xmm15 ; From entry 1 mov rax, 1 movd xmm13, r8d add rdx, 96 movd xmm15, eax mov rax, 64 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm5, xmm5 movdqa xmm14, xmm15 L_384_get_entry_65_6_start_0: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 movdqu xmm6, OWORD PTR [rdx] movdqu xmm7, OWORD PTR [rdx+16] movdqu xmm8, OWORD PTR [rdx+32] movdqu xmm9, OWORD PTR [rdx+48] movdqu xmm10, OWORD PTR [rdx+64] movdqu xmm11, OWORD PTR [rdx+80] add rdx, 96 pand xmm6, xmm12 pand xmm7, xmm12 pand xmm8, xmm12 pand xmm9, xmm12 pand xmm10, xmm12 pand xmm11, xmm12 por xmm0, xmm6 por xmm1, xmm7 por xmm2, xmm8 por xmm3, xmm9 por xmm4, xmm10 por xmm5, xmm11 dec rax jnz L_384_get_entry_65_6_start_0 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+32], xmm2 movdqu OWORD PTR [rcx+96], xmm3 movdqu OWORD PTR [rcx+112], xmm4 movdqu OWORD PTR [rcx+128], xmm5 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] movdqu xmm12, OWORD PTR [rsp+96] movdqu xmm13, OWORD PTR [rsp+112] movdqu xmm14, OWORD PTR [rsp+128] movdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 ret sp_384_get_entry_65_6 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_384_get_entry_65_avx2_6 PROC sub rsp, 96 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 mov rax, 1 movd xmm9, r8d add rdx, 96 movd xmm11, eax mov rax, 65 vpxor ymm10, ymm10, ymm10 vpermd ymm9, ymm10, ymm9 vpermd ymm11, ymm10, ymm11 vpxor ymm0, ymm0, ymm0 vpxor xmm1, xmm1, xmm1 vpxor ymm2, ymm2, ymm2 vpxor xmm3, xmm3, xmm3 vmovdqa ymm10, ymm11 L_384_get_entry_65_avx2_6_start: vpcmpeqd ymm8, ymm10, ymm9 vpaddd ymm10, ymm10, ymm11 vmovupd ymm4, YMMWORD PTR [rdx] vmovdqu xmm5, OWORD PTR [rdx+32] vmovupd ymm6, YMMWORD PTR [rdx+48] vmovdqu xmm7, OWORD PTR [rdx+80] add rdx, 96 vpand ymm4, ymm4, ymm8 vpand xmm5, xmm5, xmm8 vpand ymm6, ymm6, ymm8 vpand xmm7, xmm7, xmm8 vpor ymm0, ymm0, ymm4 vpor xmm1, xmm1, xmm5 vpor ymm2, ymm2, ymm6 vpor xmm3, xmm3, xmm7 dec rax jnz L_384_get_entry_65_avx2_6_start vmovupd YMMWORD PTR [rcx], ymm0 vmovdqu OWORD PTR [rcx+32], xmm1 vmovupd YMMWORD PTR [rcx+96], ymm2 vmovdqu OWORD PTR [rcx+128], xmm3 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] add rsp, 96 ret sp_384_get_entry_65_avx2_6 ENDP _text ENDS ENDIF ENDIF ; /* Add 1 to a. (a = a + 1) ; * ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_384_add_one_6 PROC add QWORD PTR [rcx], 1 adc QWORD PTR [rcx+8], 0 adc QWORD PTR [rcx+16], 0 adc QWORD PTR [rcx+24], 0 adc QWORD PTR [rcx+32], 0 adc QWORD PTR [rcx+40], 0 ret sp_384_add_one_6 ENDP _text ENDS ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_384_from_bin_bswap PROC push r12 push r13 mov r11, r8 mov r12, rcx add r11, r9 add r12, 48 xor r13, r13 jmp L_384_from_bin_bswap_64_end L_384_from_bin_bswap_64_start: sub r11, 64 mov rax, QWORD PTR [r11+56] mov r10, QWORD PTR [r11+48] bswap rax bswap r10 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [r11+40] mov r10, QWORD PTR [r11+32] bswap rax bswap r10 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [r11+24] mov r10, QWORD PTR [r11+16] bswap rax bswap r10 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [r11+8] mov r10, QWORD PTR [r11] bswap rax bswap r10 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_384_from_bin_bswap_64_end: cmp r9, 63 jg L_384_from_bin_bswap_64_start jmp L_384_from_bin_bswap_8_end L_384_from_bin_bswap_8_start: sub r11, 8 mov rax, QWORD PTR [r11] bswap rax mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_384_from_bin_bswap_8_end: cmp r9, 7 jg L_384_from_bin_bswap_8_start cmp r9, r13 je L_384_from_bin_bswap_hi_end mov r10, r13 mov rax, r13 L_384_from_bin_bswap_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_384_from_bin_bswap_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_384_from_bin_bswap_hi_end: cmp rcx, r12 jge L_384_from_bin_bswap_zero_end L_384_from_bin_bswap_zero_start: mov QWORD PTR [rcx], r13 add rcx, 8 cmp rcx, r12 jl L_384_from_bin_bswap_zero_start L_384_from_bin_bswap_zero_end: pop r13 pop r12 ret sp_384_from_bin_bswap ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_384_from_bin_movbe PROC push r12 mov r11, r8 mov r12, rcx add r11, r9 add r12, 48 jmp L_384_from_bin_movbe_64_end L_384_from_bin_movbe_64_start: sub r11, 64 movbe rax, QWORD PTR [r11+56] movbe r10, QWORD PTR [r11+48] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 movbe rax, QWORD PTR [r11+40] movbe r10, QWORD PTR [r11+32] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 movbe rax, QWORD PTR [r11+24] movbe r10, QWORD PTR [r11+16] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 movbe rax, QWORD PTR [r11+8] movbe r10, QWORD PTR [r11] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_384_from_bin_movbe_64_end: cmp r9, 63 jg L_384_from_bin_movbe_64_start jmp L_384_from_bin_movbe_8_end L_384_from_bin_movbe_8_start: sub r11, 8 movbe rax, QWORD PTR [r11] mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_384_from_bin_movbe_8_end: cmp r9, 7 jg L_384_from_bin_movbe_8_start cmp r9, 0 je L_384_from_bin_movbe_hi_end mov r10, 0 mov rax, 0 L_384_from_bin_movbe_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_384_from_bin_movbe_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_384_from_bin_movbe_hi_end: cmp rcx, r12 jge L_384_from_bin_movbe_zero_end L_384_from_bin_movbe_zero_start: mov QWORD PTR [rcx], 0 add rcx, 8 cmp rcx, r12 jl L_384_from_bin_movbe_zero_start L_384_from_bin_movbe_zero_end: pop r12 ret sp_384_from_bin_movbe ENDP _text ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 48 ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_384_to_bin_bswap_6 PROC mov rax, QWORD PTR [rcx+40] mov r8, QWORD PTR [rcx+32] bswap rax bswap r8 mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 mov rax, QWORD PTR [rcx+24] mov r8, QWORD PTR [rcx+16] bswap rax bswap r8 mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 mov rax, QWORD PTR [rcx+8] mov r8, QWORD PTR [rcx] bswap rax bswap r8 mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 ret sp_384_to_bin_bswap_6 ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 48 ; * Uses the movbe instruction which is optional. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_384_to_bin_movbe_6 PROC movbe rax, QWORD PTR [rcx+40] movbe r8, QWORD PTR [rcx+32] mov QWORD PTR [rdx], rax mov QWORD PTR [rdx+8], r8 movbe rax, QWORD PTR [rcx+24] movbe r8, QWORD PTR [rcx+16] mov QWORD PTR [rdx+16], rax mov QWORD PTR [rdx+24], r8 movbe rax, QWORD PTR [rcx+8] movbe r8, QWORD PTR [rcx] mov QWORD PTR [rdx+32], rax mov QWORD PTR [rdx+40], r8 ret sp_384_to_bin_movbe_6 ENDP _text ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_384_sub_in_place_6 PROC push r12 push r13 mov r8, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] mov r13, QWORD PTR [rdx+40] sub QWORD PTR [rcx], r8 sbb QWORD PTR [rcx+8], r9 sbb QWORD PTR [rcx+16], r10 sbb QWORD PTR [rcx+24], r11 sbb QWORD PTR [rcx+32], r12 sbb QWORD PTR [rcx+40], r13 sbb rax, rax pop r13 pop r12 ret sp_384_sub_in_place_6 ENDP _text ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_384_mul_d_6 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 mul QWORD PTR [r9+40] add r12, rax adc r10, rdx mov QWORD PTR [rcx+40], r12 mov QWORD PTR [rcx+48], r10 pop r12 ret sp_384_mul_d_6 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_384_mul_d_avx2_6 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+40], r12 mov QWORD PTR [rcx+48], r11 pop r13 pop r12 ret sp_384_mul_d_avx2_6 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_384_word_asm_6 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_384_word_asm_6 ENDP _text ENDS ENDIF ; /* Shift number right by 1 bit. (r = a >> 1) ; * ; * r Result of right shift by 1. ; * a Number to shift. ; */ _text SEGMENT READONLY PARA sp_384_rshift1_6 PROC push r12 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r11, QWORD PTR [rdx+32] mov r12, QWORD PTR [rdx+40] shrd rax, r8, 1 shrd r8, r9, 1 shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 shr r12, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 mov QWORD PTR [rcx+32], r11 mov QWORD PTR [rcx+40], r12 pop r12 ret sp_384_rshift1_6 ENDP _text ENDS ; /* Divide the number by 2 mod the prime. (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus ; */ _text SEGMENT READONLY PARA sp_384_div2_mod_6 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] mov r13, QWORD PTR [rdx+40] mov r14, QWORD PTR [r8] mov r15, QWORD PTR [r8+8] mov rdi, QWORD PTR [r8+16] mov rsi, QWORD PTR [r8+24] mov rbx, QWORD PTR [r8+32] mov rbp, QWORD PTR [r8+40] mov r8, rax and r8, 1 je L_384_mod_inv_6_div2_mod_no_add add rax, r14 adc r9, r15 adc r10, rdi adc r11, rsi adc r12, rbx adc r13, rbp mov r8, 0 adc r8, 0 L_384_mod_inv_6_div2_mod_no_add: shrd rax, r9, 1 shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 shrd r12, r13, 1 shrd r13, r8, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r13 pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_384_div2_mod_6 ENDP _text ENDS _text SEGMENT READONLY PARA sp_384_num_bits_6 PROC xor rax, rax mov rdx, QWORD PTR [rcx+40] cmp rdx, 0 je L_384_num_bits_6_end_320 mov rax, -1 bsr rax, rdx add rax, 321 jmp L_384_num_bits_6_done L_384_num_bits_6_end_320: mov rdx, QWORD PTR [rcx+32] cmp rdx, 0 je L_384_num_bits_6_end_256 mov rax, -1 bsr rax, rdx add rax, 257 jmp L_384_num_bits_6_done L_384_num_bits_6_end_256: mov rdx, QWORD PTR [rcx+24] cmp rdx, 0 je L_384_num_bits_6_end_192 mov rax, -1 bsr rax, rdx add rax, 193 jmp L_384_num_bits_6_done L_384_num_bits_6_end_192: mov rdx, QWORD PTR [rcx+16] cmp rdx, 0 je L_384_num_bits_6_end_128 mov rax, -1 bsr rax, rdx add rax, 129 jmp L_384_num_bits_6_done L_384_num_bits_6_end_128: mov rdx, QWORD PTR [rcx+8] cmp rdx, 0 je L_384_num_bits_6_end_64 mov rax, -1 bsr rax, rdx add rax, 65 jmp L_384_num_bits_6_done L_384_num_bits_6_end_64: mov rdx, QWORD PTR [rcx] cmp rdx, 0 je L_384_num_bits_6_end_0 mov rax, -1 bsr rax, rdx add rax, 1 jmp L_384_num_bits_6_done L_384_num_bits_6_end_0: L_384_num_bits_6_done: ret sp_384_num_bits_6 ENDP _text ENDS ENDIF IFDEF WOLFSSL_SP_521 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_521_mul_9 PROC push r12 mov r9, rdx sub rsp, 72 ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9] xor r12, r12 mov QWORD PTR [rsp], rax mov r11, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+8], r11 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+16], r12 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+24], r10 ; A[0] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+32], r11 ; A[0] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+40], r12 ; A[0] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+48], r10 ; A[0] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+56], r11 ; A[0] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+64], r12 ; A[1] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+8] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+72], r10 ; A[2] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+16] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+80], r11 ; A[3] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+24] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+88], r12 ; A[4] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+32] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+96], r10 ; A[5] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+40] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+104], r11 ; A[6] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+48] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+112], r12 ; A[7] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+56] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+120], r10 ; A[8] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx mov QWORD PTR [rcx+128], r11 mov QWORD PTR [rcx+136], r12 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r10, QWORD PTR [rsp+16] mov r11, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r10, QWORD PTR [rsp+48] mov r11, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rsp+64] mov QWORD PTR [rcx+64], rax add rsp, 72 pop r12 ret sp_521_mul_9 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r Result of multiplication. ; * a First number to multiply. ; * b Second number to multiply. ; */ _text SEGMENT READONLY PARA sp_521_mul_avx2_9 PROC push rbx push rbp push r12 push r13 push r14 push r15 mov rbp, r8 mov r8, rcx mov r9, rdx sub rsp, 72 cmp r9, r8 mov rbx, rsp cmovne rbx, r8 cmp rbp, r8 cmove rbx, rsp add r8, 72 xor r15, r15 mov rdx, QWORD PTR [r9] ; A[0] * B[0] mulx r11, r10, QWORD PTR [rbp] ; A[0] * B[1] mulx r12, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx], r10 adcx r11, rax ; A[0] * B[2] mulx r13, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+8], r11 adcx r12, rax mov QWORD PTR [rbx+16], r12 ; A[0] * B[3] mulx r10, rax, QWORD PTR [rbp+24] adcx r13, rax ; A[0] * B[4] mulx r11, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+24], r13 adcx r10, rax ; A[0] * B[5] mulx r12, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+32], r10 adcx r11, rax mov QWORD PTR [rbx+40], r11 ; A[0] * B[6] mulx r13, rax, QWORD PTR [rbp+48] adcx r12, rax ; A[0] * B[7] mulx r10, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+48], r12 adcx r13, rax ; A[0] * B[8] mulx r11, rax, QWORD PTR [rbp+64] mov QWORD PTR [rbx+56], r13 adcx r10, rax adcx r11, r15 mov r14, r15 adcx r14, r15 mov QWORD PTR [rbx+64], r10 mov QWORD PTR [r8], r11 mov rdx, QWORD PTR [r9+8] mov r11, QWORD PTR [rbx+8] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r10, QWORD PTR [rbx+32] ; A[1] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[1] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+8], r11 adcx r12, rax adox r13, rcx ; A[1] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [rbx+24], r13 mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] ; A[1] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r10, rax adox r11, rcx ; A[1] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+32], r10 adcx r11, rax adox r12, rcx ; A[1] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+48], r12 mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [r8] ; A[1] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r13, rax adox r10, rcx ; A[1] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx ; A[1] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rbx+64], r10 mov r12, r15 adcx r11, rax adox r12, rcx adcx r12, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [r8], r11 mov QWORD PTR [r8+8], r12 mov rdx, QWORD PTR [r9+16] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r10, QWORD PTR [rbx+32] mov r11, QWORD PTR [rbx+40] ; A[2] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[2] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r10, rcx ; A[2] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+24], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+32], r10 mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] ; A[2] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r11, rax adox r12, rcx ; A[2] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx ; A[2] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [rbx+56], r13 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[2] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r10, rax adox r11, rcx ; A[2] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx ; A[2] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [r8], r11 mov r13, r15 adcx r12, rax adox r13, rcx adcx r13, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [r8+8], r12 mov QWORD PTR [r8+16], r13 mov rdx, QWORD PTR [r9+24] mov r13, QWORD PTR [rbx+24] mov r10, QWORD PTR [rbx+32] mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] ; A[3] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r10, rcx ; A[3] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+24], r13 adcx r10, rax adox r11, rcx ; A[3] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+32], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+40], r11 mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [r8] ; A[3] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r12, rax adox r13, rcx ; A[3] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx ; A[3] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+64], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[3] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[3] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[3] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [r8+8], r12 mov r10, r15 adcx r13, rax adox r10, rcx adcx r10, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [r8+16], r13 mov QWORD PTR [r8+24], r10 mov rdx, QWORD PTR [r9+32] mov r10, QWORD PTR [rbx+32] mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] ; A[4] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[4] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+32], r10 adcx r11, rax adox r12, rcx ; A[4] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+48], r12 mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[4] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r13, rax adox r10, rcx ; A[4] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx ; A[4] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[4] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r13, rcx ; A[4] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r10, rcx ; A[4] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [r8+16], r13 mov r11, r15 adcx r10, rax adox r11, rcx adcx r11, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [r8+24], r10 mov QWORD PTR [r8+32], r11 mov rdx, QWORD PTR [r9+40] mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] ; A[5] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[5] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx ; A[5] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [rbx+56], r13 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[5] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r10, rax adox r11, rcx ; A[5] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx ; A[5] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r10, QWORD PTR [r8+24] mov r11, QWORD PTR [r8+32] ; A[5] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r13, rax adox r10, rcx ; A[5] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+16], r13 adcx r10, rax adox r11, rcx ; A[5] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [r8+24], r10 mov r12, r15 adcx r11, rax adox r12, rcx adcx r12, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [r8+32], r11 mov QWORD PTR [r8+40], r12 mov rdx, QWORD PTR [r9+48] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [r8] ; A[6] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[6] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx ; A[6] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+64], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[6] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r11, rax adox r12, rcx ; A[6] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[6] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [r8+16], r13 mov r11, QWORD PTR [r8+32] mov r12, QWORD PTR [r8+40] ; A[6] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r10, rax adox r11, rcx ; A[6] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+24], r10 adcx r11, rax adox r12, rcx ; A[6] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [r8+32], r11 mov r13, r15 adcx r12, rax adox r13, rcx adcx r13, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [r8+40], r12 mov QWORD PTR [r8+48], r13 mov rdx, QWORD PTR [r9+56] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[7] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r10, rcx ; A[7] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx ; A[7] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] mov r11, QWORD PTR [r8+32] ; A[7] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r12, rax adox r13, rcx ; A[7] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r10, rcx ; A[7] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+16], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r10 mov r12, QWORD PTR [r8+40] mov r13, QWORD PTR [r8+48] ; A[7] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[7] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+32], r11 adcx r12, rax adox r13, rcx ; A[7] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [r8+40], r12 mov r10, r15 adcx r13, rax adox r10, rcx adcx r10, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [r8+48], r13 mov QWORD PTR [r8+56], r10 mov rdx, QWORD PTR [r9+64] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[8] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[8] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx ; A[8] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r10, QWORD PTR [r8+24] mov r11, QWORD PTR [r8+32] mov r12, QWORD PTR [r8+40] ; A[8] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r13, rax adox r10, rcx ; A[8] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [r8+16], r13 adcx r10, rax adox r11, rcx ; A[8] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+24], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+32], r11 mov r13, QWORD PTR [r8+48] mov r10, QWORD PTR [r8+56] ; A[8] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r13, rcx ; A[8] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+40], r12 adcx r13, rax adox r10, rcx ; A[8] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [r8+48], r13 mov r11, r15 adcx r10, rax adox r11, rcx adcx r11, r14 mov QWORD PTR [r8+56], r10 mov QWORD PTR [r8+64], r11 sub r8, 72 cmp r9, r8 je L_start_521_mul_avx2_9 cmp rbp, r8 jne L_end_521_mul_avx2_9 L_start_521_mul_avx2_9: vmovdqu xmm0, OWORD PTR [rbx] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbx+16] vmovups OWORD PTR [r8+16], xmm0 vmovdqu xmm0, OWORD PTR [rbx+32] vmovups OWORD PTR [r8+32], xmm0 vmovdqu xmm0, OWORD PTR [rbx+48] vmovups OWORD PTR [r8+48], xmm0 mov rax, QWORD PTR [rbx+64] mov QWORD PTR [r8+64], rax L_end_521_mul_avx2_9: add rsp, 72 pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx ret sp_521_mul_avx2_9 ENDP _text ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_521_sqr_9 PROC push r12 push r13 push r14 mov r8, rdx sub rsp, 72 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax xor r11, r11 mov QWORD PTR [rsp], rax mov r10, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+8], r10 ; A[0] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rsp+16], r11 ; A[0] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[1] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+8] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+24], r9 ; A[0] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[1] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+8] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+32], r10 ; A[0] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+40], r11 ; A[0] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+48], r9 ; A[0] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+56], r10 ; A[0] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[4] mov rax, QWORD PTR [r8+32] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+64], r11 ; A[1] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[2] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+72], r9 ; A[2] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+16] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[3] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[5] mov rax, QWORD PTR [r8+40] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+80], r10 ; A[3] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+24] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[4] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+88], r11 ; A[4] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+32] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[5] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+40] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[6] * A[6] mov rax, QWORD PTR [r8+48] mul rax add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+96], r9 ; A[5] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+40] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[6] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+48] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rcx+104], r10 ; A[6] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+48] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[7] * A[7] mov rax, QWORD PTR [r8+56] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+112], r11 ; A[7] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+56] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+120], r9 ; A[8] * A[8] mov rax, QWORD PTR [r8+64] mul rax add r10, rax adc r11, rdx mov QWORD PTR [rcx+128], r10 mov QWORD PTR [rcx+136], r11 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r12, QWORD PTR [rsp+16] mov r13, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r12, QWORD PTR [rsp+48] mov r13, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r12 mov QWORD PTR [rcx+56], r13 mov rax, QWORD PTR [rsp+64] mov QWORD PTR [rcx+64], rax add rsp, 72 pop r14 pop r13 pop r12 ret sp_521_sqr_9 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_521_sqr_avx2_9 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov r8, rcx mov r9, rdx sub rsp, 72 cmp r9, r8 mov rbp, rsp cmovne rbp, r8 add r8, 72 xor r12, r12 ; Diagonal 1 ; Zero into %r9 ; A[1] x A[0] mov rdx, QWORD PTR [r9] mulx r11, r10, QWORD PTR [r9+8] mov QWORD PTR [rbp+8], r10 ; Zero into %r8 ; A[2] x A[0] mulx r10, rax, QWORD PTR [r9+16] adcx r11, rax adox r10, r12 mov QWORD PTR [rbp+16], r11 ; No load %r12 - %r9 ; A[3] x A[0] mulx r14, rax, QWORD PTR [r9+24] adcx r10, rax adox r14, r12 mov QWORD PTR [rbp+24], r10 ; No load %r13 - %r8 ; A[4] x A[0] mulx r15, rax, QWORD PTR [r9+32] adcx r14, rax adox r15, r12 ; No store %r12 - %r9 ; No load %r14 - %r9 ; A[5] x A[0] mulx rdi, rax, QWORD PTR [r9+40] adcx r15, rax adox rdi, r12 ; No store %r13 - %r8 ; No load %r15 - %r8 ; A[6] x A[0] mulx rsi, rax, QWORD PTR [r9+48] adcx rdi, rax adox rsi, r12 ; No store %r14 - %r9 ; No load %rbx - %r9 ; A[7] x A[0] mulx rbx, rax, QWORD PTR [r9+56] adcx rsi, rax adox rbx, r12 ; No store %r15 - %r8 ; Zero into %r8 ; A[8] x A[0] mulx r10, rax, QWORD PTR [r9+64] adcx rbx, rax adox r10, r12 ; No store %rbx - %r9 ; Zero into %r9 ; A[8] x A[1] mov rdx, QWORD PTR [r9+8] mulx r11, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, r12 mov QWORD PTR [r8], r10 ; Carry adcx r11, r12 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+8], r11 ; Diagonal 2 mov r11, QWORD PTR [rbp+24] ; No load %r12 - %r8 ; A[2] x A[1] mulx rcx, rax, QWORD PTR [r9+16] adcx r11, rax adox r14, rcx mov QWORD PTR [rbp+24], r11 ; No load %r13 - %r9 ; A[3] x A[1] mulx rcx, rax, QWORD PTR [r9+24] adcx r14, rax adox r15, rcx ; No store %r12 - %r8 ; No load %r14 - %r8 ; A[4] x A[1] mulx rcx, rax, QWORD PTR [r9+32] adcx r15, rax adox rdi, rcx ; No store %r13 - %r9 ; No load %r15 - %r9 ; A[5] x A[1] mulx rcx, rax, QWORD PTR [r9+40] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r8 ; No load %rbx - %r8 ; A[6] x A[1] mulx rcx, rax, QWORD PTR [r9+48] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r9 mov r11, QWORD PTR [r8] ; A[7] x A[1] mulx rcx, rax, QWORD PTR [r9+56] adcx rbx, rax adox r11, rcx ; No store %rbx - %r8 mov r10, QWORD PTR [r8+8] ; A[7] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, QWORD PTR [r9+56] adcx r11, rax adox r10, rcx mov QWORD PTR [r8], r11 ; Zero into %r9 ; A[7] x A[3] mov rdx, QWORD PTR [r9+24] mulx r11, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, r12 mov QWORD PTR [r8+8], r10 ; Zero into %r8 ; A[7] x A[4] mov rdx, QWORD PTR [r9+32] mulx r10, rax, QWORD PTR [r9+56] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+16], r11 ; Carry adcx r10, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+24], r10 ; Diagonal 3 ; No load %r14 - %r9 ; A[3] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, QWORD PTR [r9+24] adcx r15, rax adox rdi, rcx ; No store %r13 - %r8 ; No load %r15 - %r8 ; A[4] x A[2] mulx rcx, rax, QWORD PTR [r9+32] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r9 ; No load %rbx - %r9 ; A[5] x A[2] mulx rcx, rax, QWORD PTR [r9+40] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r8 mov r10, QWORD PTR [r8] ; A[6] x A[2] mulx rcx, rax, QWORD PTR [r9+48] adcx rbx, rax adox r10, rcx ; No store %rbx - %r9 mov r11, QWORD PTR [r8+8] ; A[6] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r11, rcx mov QWORD PTR [r8], r10 mov r10, QWORD PTR [r8+16] ; A[6] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+48] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+8], r11 mov r11, QWORD PTR [r8+24] ; A[6] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+16], r10 ; Zero into %r8 ; A[8] x A[4] mov rdx, QWORD PTR [r9+32] mulx r10, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+24], r11 ; Zero into %r9 ; A[8] x A[5] mov rdx, QWORD PTR [r9+40] mulx r11, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, r12 mov QWORD PTR [r8+32], r10 ; Carry adcx r11, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+40], r11 ; Diagonal 4 ; No load %rbx - %r8 ; A[4] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+32] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r9 mov r11, QWORD PTR [r8] ; A[5] x A[3] mulx rcx, rax, QWORD PTR [r9+40] adcx rbx, rax adox r11, rcx ; No store %rbx - %r8 mov r10, QWORD PTR [r8+8] ; A[5] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+40] adcx r11, rax adox r10, rcx mov QWORD PTR [r8], r11 mov r11, QWORD PTR [r8+16] ; A[8] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+8], r10 mov r10, QWORD PTR [r8+24] ; A[8] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+16], r11 mov r11, QWORD PTR [r8+32] ; A[7] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r10 mov r10, QWORD PTR [r8+40] ; A[7] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+56] adcx r11, rax adox r10, rcx mov QWORD PTR [r8+32], r11 ; Zero into %r9 ; A[8] x A[6] mulx r11, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, r12 mov QWORD PTR [r8+40], r10 ; Zero into %r8 ; A[8] x A[7] mov rdx, QWORD PTR [r9+56] mulx r10, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, r12 mov QWORD PTR [r8+48], r11 ; Carry adcx r10, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [r8+56], r10 mov QWORD PTR [r8+64], r13 ; Double and Add in A[i] x A[i] mov r11, QWORD PTR [rbp+8] ; A[0] x A[0] mov rdx, QWORD PTR [r9] mulx rcx, rax, rdx mov QWORD PTR [rbp], rax adox r11, r11 adcx r11, rcx mov QWORD PTR [rbp+8], r11 mov r10, QWORD PTR [rbp+16] mov r11, QWORD PTR [rbp+24] ; A[1] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+16], r10 mov QWORD PTR [rbp+24], r11 ; A[2] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, rdx adox r14, r14 adox r15, r15 adcx r14, rax adcx r15, rcx ; A[3] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, rdx adox rdi, rdi adox rsi, rsi adcx rdi, rax adcx rsi, rcx mov r11, QWORD PTR [r8] ; A[4] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, rdx adox rbx, rbx adox r11, r11 adcx rbx, rax adcx r11, rcx mov QWORD PTR [r8], r11 mov r10, QWORD PTR [r8+8] mov r11, QWORD PTR [r8+16] ; A[5] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+8], r10 mov QWORD PTR [r8+16], r11 mov r10, QWORD PTR [r8+24] mov r11, QWORD PTR [r8+32] ; A[6] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+24], r10 mov QWORD PTR [r8+32], r11 mov r10, QWORD PTR [r8+40] mov r11, QWORD PTR [r8+48] ; A[7] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+40], r10 mov QWORD PTR [r8+48], r11 mov r10, QWORD PTR [r8+56] mov r11, QWORD PTR [r8+64] ; A[8] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+56], r10 mov QWORD PTR [r8+64], r11 mov QWORD PTR [r8+-40], r14 mov QWORD PTR [r8+-32], r15 mov QWORD PTR [r8+-24], rdi mov QWORD PTR [r8+-16], rsi mov QWORD PTR [r8+-8], rbx sub r8, 72 cmp r9, r8 jne L_end_521_sqr_avx2_9 vmovdqu xmm0, OWORD PTR [rbp] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbp+16] vmovups OWORD PTR [r8+16], xmm0 L_end_521_sqr_avx2_9: add rsp, 72 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_521_sqr_avx2_9 ENDP _text ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_521_add_9 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov QWORD PTR [rcx+64], r9 adc rax, 0 ret sp_521_add_9 ENDP _text ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_521_sub_9 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 sbb r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 sbb r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 sbb r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 sbb r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 sbb r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 sbb r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 sbb r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 sbb r9, QWORD PTR [r8+64] mov QWORD PTR [rcx+64], r9 sbb rax, rax ret sp_521_sub_9 ENDP _text ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * ; * r A single precision number to copy over. ; * a A single precision number to copy. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_521_cond_copy_9 PROC push r12 mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [rcx+16] mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rcx+32] xor rax, QWORD PTR [rdx] xor r9, QWORD PTR [rdx+8] xor r10, QWORD PTR [rdx+16] xor r11, QWORD PTR [rdx+24] xor r12, QWORD PTR [rdx+32] and rax, r8 and r9, r8 and r10, r8 and r11, r8 and r12, r8 xor QWORD PTR [rcx], rax xor QWORD PTR [rcx+8], r9 xor QWORD PTR [rcx+16], r10 xor QWORD PTR [rcx+24], r11 xor QWORD PTR [rcx+32], r12 mov rax, QWORD PTR [rcx+40] mov r9, QWORD PTR [rcx+48] mov r10, QWORD PTR [rcx+56] mov r11, QWORD PTR [rcx+64] xor rax, QWORD PTR [rdx+40] xor r9, QWORD PTR [rdx+48] xor r10, QWORD PTR [rdx+56] xor r11, QWORD PTR [rdx+64] and rax, r8 and r9, r8 and r10, r8 and r11, r8 xor QWORD PTR [rcx+40], rax xor QWORD PTR [rcx+48], r9 xor QWORD PTR [rcx+56], r10 xor QWORD PTR [rcx+64], r11 pop r12 ret sp_521_cond_copy_9 ENDP _text ENDS ; /* Multiply two Montgomery form numbers mod the modulus (prime). ; * (r = a * b mod m) ; * ; * r Result of multiplication. ; * a First number to multiply in Montgomery form. ; * b Second number to multiply in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_521_mont_mul_9 PROC push r12 push r13 push r14 push r15 mov r9, rdx sub rsp, 144 ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9] xor r15, r15 mov QWORD PTR [rsp], rax mov r14, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9] xor r13, r13 add r14, rax adc r15, rdx adc r13, 0 ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+8] add r14, rax adc r15, rdx adc r13, 0 mov QWORD PTR [rsp+8], r14 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9] xor r14, r14 add r15, rax adc r13, rdx adc r14, 0 ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+8] add r15, rax adc r13, rdx adc r14, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+16] add r15, rax adc r13, rdx adc r14, 0 mov QWORD PTR [rsp+16], r15 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9] xor r15, r15 add r13, rax adc r14, rdx adc r15, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+8] add r13, rax adc r14, rdx adc r15, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+16] add r13, rax adc r14, rdx adc r15, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+24] add r13, rax adc r14, rdx adc r15, 0 mov QWORD PTR [rsp+24], r13 ; A[0] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9] xor r13, r13 add r14, rax adc r15, rdx adc r13, 0 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+8] add r14, rax adc r15, rdx adc r13, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+16] add r14, rax adc r15, rdx adc r13, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+24] add r14, rax adc r15, rdx adc r13, 0 ; A[4] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+32] add r14, rax adc r15, rdx adc r13, 0 mov QWORD PTR [rsp+32], r14 ; A[0] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9] xor r14, r14 add r15, rax adc r13, rdx adc r14, 0 ; A[1] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+8] add r15, rax adc r13, rdx adc r14, 0 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+16] add r15, rax adc r13, rdx adc r14, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+24] add r15, rax adc r13, rdx adc r14, 0 ; A[4] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+32] add r15, rax adc r13, rdx adc r14, 0 ; A[5] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+40] add r15, rax adc r13, rdx adc r14, 0 mov QWORD PTR [rsp+40], r15 ; A[0] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9] xor r15, r15 add r13, rax adc r14, rdx adc r15, 0 ; A[1] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+8] add r13, rax adc r14, rdx adc r15, 0 ; A[2] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+16] add r13, rax adc r14, rdx adc r15, 0 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+24] add r13, rax adc r14, rdx adc r15, 0 ; A[4] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+32] add r13, rax adc r14, rdx adc r15, 0 ; A[5] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+40] add r13, rax adc r14, rdx adc r15, 0 ; A[6] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+48] add r13, rax adc r14, rdx adc r15, 0 mov QWORD PTR [rsp+48], r13 ; A[0] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9] xor r13, r13 add r14, rax adc r15, rdx adc r13, 0 ; A[1] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+8] add r14, rax adc r15, rdx adc r13, 0 ; A[2] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+16] add r14, rax adc r15, rdx adc r13, 0 ; A[3] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+24] add r14, rax adc r15, rdx adc r13, 0 ; A[4] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+32] add r14, rax adc r15, rdx adc r13, 0 ; A[5] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+40] add r14, rax adc r15, rdx adc r13, 0 ; A[6] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+48] add r14, rax adc r15, rdx adc r13, 0 ; A[7] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+56] add r14, rax adc r15, rdx adc r13, 0 mov QWORD PTR [rsp+56], r14 ; A[0] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9] xor r14, r14 add r15, rax adc r13, rdx adc r14, 0 ; A[1] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+8] add r15, rax adc r13, rdx adc r14, 0 ; A[2] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+16] add r15, rax adc r13, rdx adc r14, 0 ; A[3] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+24] add r15, rax adc r13, rdx adc r14, 0 ; A[4] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+32] add r15, rax adc r13, rdx adc r14, 0 ; A[5] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+40] add r15, rax adc r13, rdx adc r14, 0 ; A[6] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+48] add r15, rax adc r13, rdx adc r14, 0 ; A[7] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+56] add r15, rax adc r13, rdx adc r14, 0 ; A[8] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+64] add r15, rax adc r13, rdx adc r14, 0 mov QWORD PTR [rsp+64], r15 ; A[1] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+8] xor r15, r15 add r13, rax adc r14, rdx adc r15, 0 ; A[2] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+16] add r13, rax adc r14, rdx adc r15, 0 ; A[3] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+24] add r13, rax adc r14, rdx adc r15, 0 ; A[4] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+32] add r13, rax adc r14, rdx adc r15, 0 ; A[5] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+40] add r13, rax adc r14, rdx adc r15, 0 ; A[6] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+48] add r13, rax adc r14, rdx adc r15, 0 ; A[7] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+56] add r13, rax adc r14, rdx adc r15, 0 ; A[8] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+64] add r13, rax adc r14, rdx adc r15, 0 mov QWORD PTR [rsp+72], r13 ; A[2] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+16] xor r13, r13 add r14, rax adc r15, rdx adc r13, 0 ; A[3] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+24] add r14, rax adc r15, rdx adc r13, 0 ; A[4] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+32] add r14, rax adc r15, rdx adc r13, 0 ; A[5] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+40] add r14, rax adc r15, rdx adc r13, 0 ; A[6] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+48] add r14, rax adc r15, rdx adc r13, 0 ; A[7] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+56] add r14, rax adc r15, rdx adc r13, 0 ; A[8] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+64] add r14, rax adc r15, rdx adc r13, 0 mov QWORD PTR [rsp+80], r14 ; A[3] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+24] xor r14, r14 add r15, rax adc r13, rdx adc r14, 0 ; A[4] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+32] add r15, rax adc r13, rdx adc r14, 0 ; A[5] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+40] add r15, rax adc r13, rdx adc r14, 0 ; A[6] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+48] add r15, rax adc r13, rdx adc r14, 0 ; A[7] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+56] add r15, rax adc r13, rdx adc r14, 0 ; A[8] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+64] add r15, rax adc r13, rdx adc r14, 0 mov QWORD PTR [rsp+88], r15 ; A[4] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+32] xor r15, r15 add r13, rax adc r14, rdx adc r15, 0 ; A[5] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+40] add r13, rax adc r14, rdx adc r15, 0 ; A[6] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+48] add r13, rax adc r14, rdx adc r15, 0 ; A[7] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+56] add r13, rax adc r14, rdx adc r15, 0 ; A[8] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+64] add r13, rax adc r14, rdx adc r15, 0 mov QWORD PTR [rsp+96], r13 ; A[5] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+40] xor r13, r13 add r14, rax adc r15, rdx adc r13, 0 ; A[6] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+48] add r14, rax adc r15, rdx adc r13, 0 ; A[7] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+56] add r14, rax adc r15, rdx adc r13, 0 ; A[8] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+64] add r14, rax adc r15, rdx adc r13, 0 mov QWORD PTR [rsp+104], r14 ; A[6] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+48] xor r14, r14 add r15, rax adc r13, rdx adc r14, 0 ; A[7] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+56] add r15, rax adc r13, rdx adc r14, 0 ; A[8] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+64] add r15, rax adc r13, rdx adc r14, 0 mov QWORD PTR [rsp+112], r15 ; A[7] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+56] xor r15, r15 add r13, rax adc r14, rdx adc r15, 0 ; A[8] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+64] add r13, rax adc r14, rdx adc r15, 0 mov QWORD PTR [rsp+120], r13 ; A[8] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+64] add r14, rax adc r15, rdx mov QWORD PTR [rsp+128], r14 mov QWORD PTR [rsp+136], r15 mov rax, QWORD PTR [rsp+64] mov rdx, QWORD PTR [rsp+72] mov r13, QWORD PTR [rsp+80] mov r12, rax and r12, 511 mov r14, QWORD PTR [rsp+88] mov r15, QWORD PTR [rsp+96] mov r8, QWORD PTR [rsp+104] mov r9, QWORD PTR [rsp+112] mov r10, QWORD PTR [rsp+120] mov r11, QWORD PTR [rsp+128] shrd rax, rdx, 9 shrd rdx, r13, 9 shrd r13, r14, 9 shrd r14, r15, 9 shrd r15, r8, 9 shrd r8, r9, 9 shrd r9, r10, 9 shrd r10, r11, 9 shr r11, 9 add rax, QWORD PTR [rsp] adc rdx, QWORD PTR [rsp+8] adc r13, QWORD PTR [rsp+16] adc r14, QWORD PTR [rsp+24] adc r15, QWORD PTR [rsp+32] adc r8, QWORD PTR [rsp+40] adc r9, QWORD PTR [rsp+48] adc r10, QWORD PTR [rsp+56] adc r12, r11 mov r11, r12 shr r12, 9 and r11, 511 add rax, r12 adc rdx, 0 adc r13, 0 adc r14, 0 adc r15, 0 adc r8, 0 adc r9, 0 adc r10, 0 adc r11, 0 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r13 mov QWORD PTR [rcx+24], r14 mov QWORD PTR [rcx+32], r15 mov QWORD PTR [rcx+40], r8 mov QWORD PTR [rcx+48], r9 mov QWORD PTR [rcx+56], r10 mov QWORD PTR [rcx+64], r11 add rsp, 144 pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_mul_9 ENDP _text ENDS ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_521_mont_sqr_9 PROC push r12 push r13 push r14 push r15 mov r8, rdx sub rsp, 144 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax xor r12, r12 mov QWORD PTR [rsp], rax mov r11, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+8], r11 ; A[0] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+16], r12 ; A[0] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+8] add r10, rax adc r11, rdx adc r12, 0 add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+24], r10 ; A[0] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+8] add r11, rax adc r12, rdx adc r10, 0 add r11, rax adc r12, rdx adc r10, 0 ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+32], r11 ; A[0] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8] xor r11, r11 xor r15, r15 mov r13, rax mov r14, rdx ; A[1] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+8] add r13, rax adc r14, rdx adc r15, 0 ; A[2] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+16] add r13, rax adc r14, rdx adc r15, 0 add r13, r13 adc r14, r14 adc r15, r15 add r12, r13 adc r10, r14 adc r11, r15 mov QWORD PTR [rsp+40], r12 ; A[0] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8] xor r12, r12 xor r15, r15 mov r13, rax mov r14, rdx ; A[1] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+8] add r13, rax adc r14, rdx adc r15, 0 ; A[2] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+16] add r13, rax adc r14, rdx adc r15, 0 ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax add r13, r13 adc r14, r14 adc r15, r15 add r13, rax adc r14, rdx adc r15, 0 add r10, r13 adc r11, r14 adc r12, r15 mov QWORD PTR [rsp+48], r10 ; A[0] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8] xor r10, r10 xor r15, r15 mov r13, rax mov r14, rdx ; A[1] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+8] add r13, rax adc r14, rdx adc r15, 0 ; A[2] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+16] add r13, rax adc r14, rdx adc r15, 0 ; A[3] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+24] add r13, rax adc r14, rdx adc r15, 0 add r13, r13 adc r14, r14 adc r15, r15 add r11, r13 adc r12, r14 adc r10, r15 mov QWORD PTR [rsp+56], r11 ; A[0] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8] xor r11, r11 xor r15, r15 mov r13, rax mov r14, rdx ; A[1] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+8] add r13, rax adc r14, rdx adc r15, 0 ; A[2] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+16] add r13, rax adc r14, rdx adc r15, 0 ; A[3] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+24] add r13, rax adc r14, rdx adc r15, 0 ; A[4] * A[4] mov rax, QWORD PTR [r8+32] mul rax add r13, r13 adc r14, r14 adc r15, r15 add r13, rax adc r14, rdx adc r15, 0 add r12, r13 adc r10, r14 adc r11, r15 mov QWORD PTR [rsp+64], r12 ; A[1] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+8] xor r12, r12 xor r15, r15 mov r13, rax mov r14, rdx ; A[2] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+16] add r13, rax adc r14, rdx adc r15, 0 ; A[3] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+24] add r13, rax adc r14, rdx adc r15, 0 ; A[4] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+32] add r13, rax adc r14, rdx adc r15, 0 add r13, r13 adc r14, r14 adc r15, r15 add r10, r13 adc r11, r14 adc r12, r15 mov QWORD PTR [rsp+72], r10 ; A[2] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+16] xor r10, r10 xor r15, r15 mov r13, rax mov r14, rdx ; A[3] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+24] add r13, rax adc r14, rdx adc r15, 0 ; A[4] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+32] add r13, rax adc r14, rdx adc r15, 0 ; A[5] * A[5] mov rax, QWORD PTR [r8+40] mul rax add r13, r13 adc r14, r14 adc r15, r15 add r13, rax adc r14, rdx adc r15, 0 add r11, r13 adc r12, r14 adc r10, r15 mov QWORD PTR [rsp+80], r11 ; A[3] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+24] xor r11, r11 xor r15, r15 mov r13, rax mov r14, rdx ; A[4] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+32] add r13, rax adc r14, rdx adc r15, 0 ; A[5] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+40] add r13, rax adc r14, rdx adc r15, 0 add r13, r13 adc r14, r14 adc r15, r15 add r12, r13 adc r10, r14 adc r11, r15 mov QWORD PTR [rsp+88], r12 ; A[4] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+32] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 add r10, rax adc r11, rdx adc r12, 0 ; A[5] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+40] add r10, rax adc r11, rdx adc r12, 0 add r10, rax adc r11, rdx adc r12, 0 ; A[6] * A[6] mov rax, QWORD PTR [r8+48] mul rax add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+96], r10 ; A[5] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+40] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 add r11, rax adc r12, rdx adc r10, 0 ; A[6] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+48] add r11, rax adc r12, rdx adc r10, 0 add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+104], r11 ; A[6] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+48] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 add r12, rax adc r10, rdx adc r11, 0 ; A[7] * A[7] mov rax, QWORD PTR [r8+56] mul rax add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+112], r12 ; A[7] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+56] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+120], r10 ; A[8] * A[8] mov rax, QWORD PTR [r8+64] mul rax add r11, rax adc r12, rdx mov QWORD PTR [rsp+128], r11 mov QWORD PTR [rsp+136], r12 mov r10, QWORD PTR [rsp+64] mov r11, QWORD PTR [rsp+72] mov r12, QWORD PTR [rsp+80] mov r9, r10 and r9, 511 mov rax, QWORD PTR [rsp+88] mov rdx, QWORD PTR [rsp+96] mov r13, QWORD PTR [rsp+104] mov r14, QWORD PTR [rsp+112] mov r15, QWORD PTR [rsp+120] mov r8, QWORD PTR [rsp+128] shrd r10, r11, 9 shrd r11, r12, 9 shrd r12, rax, 9 shrd rax, rdx, 9 shrd rdx, r13, 9 shrd r13, r14, 9 shrd r14, r15, 9 shrd r15, r8, 9 shr r8, 9 add r10, QWORD PTR [rsp] adc r11, QWORD PTR [rsp+8] adc r12, QWORD PTR [rsp+16] adc rax, QWORD PTR [rsp+24] adc rdx, QWORD PTR [rsp+32] adc r13, QWORD PTR [rsp+40] adc r14, QWORD PTR [rsp+48] adc r15, QWORD PTR [rsp+56] adc r9, r8 mov r8, r9 shr r9, 9 and r8, 511 add r10, r9 adc r11, 0 adc r12, 0 adc rax, 0 adc rdx, 0 adc r13, 0 adc r14, 0 adc r15, 0 adc r8, 0 mov QWORD PTR [rcx], r10 mov QWORD PTR [rcx+8], r11 mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], rax mov QWORD PTR [rcx+32], rdx mov QWORD PTR [rcx+40], r13 mov QWORD PTR [rcx+48], r14 mov QWORD PTR [rcx+56], r15 mov QWORD PTR [rcx+64], r8 add rsp, 144 pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_sqr_9 ENDP _text ENDS ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_521_cmp_9 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+64] mov r12, QWORD PTR [rdx+64] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+56] mov r12, QWORD PTR [rdx+56] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+48] mov r12, QWORD PTR [rdx+48] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_521_cmp_9 ENDP _text ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_521_cond_sub_9 PROC sub rsp, 72 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] and r10, r9 mov QWORD PTR [rsp+64], r10 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] sbb r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] sbb r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] sbb r10, r8 mov QWORD PTR [rcx+56], r11 mov QWORD PTR [rcx+64], r10 sbb rax, rax add rsp, 72 ret sp_521_cond_sub_9 ENDP _text ENDS ; /* Reduce the number back to 521 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_521_mont_reduce_9 PROC push r12 push r13 push r14 push r15 mov rdx, QWORD PTR [rcx+64] mov rax, QWORD PTR [rcx+72] mov r8, QWORD PTR [rcx+80] mov r15, rdx and r15, 511 mov r9, QWORD PTR [rcx+88] mov r10, QWORD PTR [rcx+96] mov r11, QWORD PTR [rcx+104] mov r12, QWORD PTR [rcx+112] mov r13, QWORD PTR [rcx+120] mov r14, QWORD PTR [rcx+128] shrd rdx, rax, 9 shrd rax, r8, 9 shrd r8, r9, 9 shrd r9, r10, 9 shrd r10, r11, 9 shrd r11, r12, 9 shrd r12, r13, 9 shrd r13, r14, 9 shr r14, 9 add rdx, QWORD PTR [rcx] adc rax, QWORD PTR [rcx+8] adc r8, QWORD PTR [rcx+16] adc r9, QWORD PTR [rcx+24] adc r10, QWORD PTR [rcx+32] adc r11, QWORD PTR [rcx+40] adc r12, QWORD PTR [rcx+48] adc r13, QWORD PTR [rcx+56] adc r15, r14 mov r14, r15 shr r15, 9 and r14, 511 add rdx, r15 adc rax, 0 adc r8, 0 adc r9, 0 adc r10, 0 adc r11, 0 adc r12, 0 adc r13, 0 adc r14, 0 mov QWORD PTR [rcx], rdx mov QWORD PTR [rcx+8], rax mov QWORD PTR [rcx+16], r8 mov QWORD PTR [rcx+24], r9 mov QWORD PTR [rcx+32], r10 mov QWORD PTR [rcx+40], r11 mov QWORD PTR [rcx+48], r12 mov QWORD PTR [rcx+56], r13 mov QWORD PTR [rcx+64], r14 pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_reduce_9 ENDP _text ENDS ; /* Reduce the number back to 521 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_521_mont_reduce_order_9 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 9 mov r10, 9 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_521_mont_reduce_order_9_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 cmp r10, 1 jne L_521_mont_reduce_order_9_nomask and r13, 511 L_521_mont_reduce_order_9_nomask: ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax mov QWORD PTR [rcx], r15 adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+40], r14 adc r11, 0 ; a[i+6] += m[6] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+48] mov r14, QWORD PTR [rcx+48] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+48], r14 adc r12, 0 ; a[i+7] += m[7] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+56] mov r14, QWORD PTR [rcx+56] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+56], r14 adc r11, 0 ; a[i+8] += m[8] * mu mov rax, r13 mul QWORD PTR [r9+64] mov r14, QWORD PTR [rcx+64] add r11, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r11 mov QWORD PTR [rcx+64], r14 adc QWORD PTR [rcx+72], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_521_mont_reduce_order_9_loop mov QWORD PTR [rcx], r15 mov QWORD PTR [rcx+8], rdi mov r8, rcx sub rcx, 72 sub r8, 8 mov rax, QWORD PTR [r8] mov rdx, QWORD PTR [r8+8] mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] mov r13, QWORD PTR [r8+32] shrd rax, rdx, 9 shrd rdx, r10, 9 shrd r10, r11, 9 shrd r11, r13, 9 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rdx, QWORD PTR [r8+40] mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] mov rax, QWORD PTR [r8+64] shrd r13, rdx, 9 shrd rdx, r10, 9 shrd r10, r11, 9 shrd r11, rax, 9 mov QWORD PTR [rcx+32], r13 mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rdx, QWORD PTR [r8+72] shrd rax, rdx, 9 shr rdx, 9 mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], rdx mov rsi, QWORD PTR [rcx+64] shr rsi, 9 neg rsi IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx call sp_521_cond_sub_9 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_reduce_order_9 ENDP _text ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_521_mont_add_9 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] mov r13, QWORD PTR [rdx+40] mov r14, QWORD PTR [rdx+48] mov r15, QWORD PTR [rdx+56] mov rdi, QWORD PTR [rdx+64] add rax, QWORD PTR [r8] adc r9, QWORD PTR [r8+8] adc r10, QWORD PTR [r8+16] adc r11, QWORD PTR [r8+24] adc r12, QWORD PTR [r8+32] adc r13, QWORD PTR [r8+40] adc r14, QWORD PTR [r8+48] adc r15, QWORD PTR [r8+56] adc rdi, QWORD PTR [r8+64] mov rsi, rdi and rdi, 511 shr rsi, 9 add rax, rsi adc r9, 0 adc r10, 0 adc r11, 0 adc r12, 0 adc r13, 0 adc r14, 0 adc r15, 0 adc rdi, 0 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r13 mov QWORD PTR [rcx+48], r14 mov QWORD PTR [rcx+56], r15 mov QWORD PTR [rcx+64], rdi pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_add_9 ENDP _text ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. ; * a Number to souble in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_521_mont_dbl_9 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r11, QWORD PTR [rdx+32] mov r12, QWORD PTR [rdx+40] mov r13, QWORD PTR [rdx+48] mov r14, QWORD PTR [rdx+56] mov r15, QWORD PTR [rdx+64] add rax, rax adc r8, r8 adc r9, r9 adc r10, r10 adc r11, r11 adc r12, r12 adc r13, r13 adc r14, r14 adc r15, r15 mov rdi, r15 and r15, 511 shr rdi, 9 add rax, rdi adc r8, 0 adc r9, 0 adc r10, 0 adc r11, 0 adc r12, 0 adc r13, 0 adc r14, 0 adc r15, 0 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 mov QWORD PTR [rcx+32], r11 mov QWORD PTR [rcx+40], r12 mov QWORD PTR [rcx+48], r13 mov QWORD PTR [rcx+56], r14 mov QWORD PTR [rcx+64], r15 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_dbl_9 ENDP _text ENDS ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of Tripling. ; * a Number to triple in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_521_mont_tpl_9 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r11, QWORD PTR [rdx+32] mov r12, QWORD PTR [rdx+40] mov r13, QWORD PTR [rdx+48] mov r14, QWORD PTR [rdx+56] mov r15, QWORD PTR [rdx+64] add rax, rax adc r8, r8 adc r9, r9 adc r10, r10 adc r11, r11 adc r12, r12 adc r13, r13 adc r14, r14 adc r15, r15 add rax, QWORD PTR [rdx] adc r8, QWORD PTR [rdx+8] adc r9, QWORD PTR [rdx+16] adc r10, QWORD PTR [rdx+24] adc r11, QWORD PTR [rdx+32] adc r12, QWORD PTR [rdx+40] adc r13, QWORD PTR [rdx+48] adc r14, QWORD PTR [rdx+56] adc r15, QWORD PTR [rdx+64] mov rdi, r15 and r15, 511 shr rdi, 9 add rax, rdi adc r8, 0 adc r9, 0 adc r10, 0 adc r11, 0 adc r12, 0 adc r13, 0 adc r14, 0 adc r15, 0 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 mov QWORD PTR [rcx+32], r11 mov QWORD PTR [rcx+40], r12 mov QWORD PTR [rcx+48], r13 mov QWORD PTR [rcx+56], r14 mov QWORD PTR [rcx+64], r15 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_tpl_9 ENDP _text ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_521_mont_sub_9 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] mov r13, QWORD PTR [rdx+40] mov r14, QWORD PTR [rdx+48] mov r15, QWORD PTR [rdx+56] mov rdi, QWORD PTR [rdx+64] sub rax, QWORD PTR [r8] sbb r9, QWORD PTR [r8+8] sbb r10, QWORD PTR [r8+16] sbb r11, QWORD PTR [r8+24] sbb r12, QWORD PTR [r8+32] sbb r13, QWORD PTR [r8+40] sbb r14, QWORD PTR [r8+48] sbb r15, QWORD PTR [r8+56] sbb rdi, QWORD PTR [r8+64] mov rsi, rdi and rdi, 511 sar rsi, 9 neg rsi sub rax, rsi sbb r9, 0 sbb r10, 0 sbb r11, 0 sbb r12, 0 sbb r13, 0 sbb r14, 0 sbb r15, 0 sbb rdi, 0 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r13 mov QWORD PTR [rcx+48], r14 mov QWORD PTR [rcx+56], r15 mov QWORD PTR [rcx+64], rdi pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_sub_9 ENDP _text ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_521_mont_div2_9 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r11, QWORD PTR [rdx+32] mov r12, QWORD PTR [rdx+40] mov r13, QWORD PTR [rdx+48] mov r14, QWORD PTR [rdx+56] mov r15, QWORD PTR [rdx+64] mov rdi, rax and rdi, 1 sub rax, rdi sbb r8, 0 sbb r9, 0 sbb r10, 0 sbb r11, 0 sbb r12, 0 sbb r13, 0 sbb r14, 0 sbb r15, 0 shl rdi, 9 add r15, rdi shrd rax, r8, 1 shrd r8, r9, 1 shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 shrd r12, r13, 1 shrd r13, r14, 1 shrd r14, r15, 1 shr r15, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 mov QWORD PTR [rcx+32], r11 mov QWORD PTR [rcx+40], r12 mov QWORD PTR [rcx+48], r13 mov QWORD PTR [rcx+56], r14 mov QWORD PTR [rcx+64], r15 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_div2_9 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ _text SEGMENT READONLY PARA sp_521_get_point_33_9 PROC push r12 push r13 push r14 sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 movdqu OWORD PTR [rsp+96], xmm12 movdqu OWORD PTR [rsp+112], xmm13 movdqu OWORD PTR [rsp+128], xmm14 movdqu OWORD PTR [rsp+144], xmm15 mov r14, 1 mov rax, 1 movd xmm13, r8d add rdx, 440 movd xmm15, eax mov rax, 32 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm5, xmm5 xor r12, r12 xor r13, r13 movdqa xmm14, xmm15 L_521_get_point_33_9_start_1: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 xor r9, r9 cmp r8, r14 sete r9b neg r9 inc r14 movdqu xmm6, OWORD PTR [rdx] movdqu xmm7, OWORD PTR [rdx+16] movdqu xmm8, OWORD PTR [rdx+32] movdqu xmm9, OWORD PTR [rdx+48] mov r10, QWORD PTR [rdx+64] movdqu xmm10, OWORD PTR [rdx+144] movdqu xmm11, OWORD PTR [rdx+160] add rdx, 440 pand xmm6, xmm12 pand xmm7, xmm12 pand xmm8, xmm12 pand xmm9, xmm12 pand xmm10, xmm12 pand xmm11, xmm12 and r10, r9 por xmm0, xmm6 por xmm1, xmm7 por xmm2, xmm8 por xmm3, xmm9 por xmm4, xmm10 por xmm5, xmm11 or r12, r10 dec rax jnz L_521_get_point_33_9_start_1 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+32], xmm2 movdqu OWORD PTR [rcx+48], xmm3 mov QWORD PTR [rcx+64], r12 movdqu OWORD PTR [rcx+144], xmm4 movdqu OWORD PTR [rcx+160], xmm5 mov r14, 1 mov rax, 1 movd xmm13, r8d sub rdx, 14080 movd xmm15, eax mov rax, 32 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm5, xmm5 xor r12, r12 xor r13, r13 movdqa xmm14, xmm15 L_521_get_point_33_9_start_2: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 xor r9, r9 cmp r8, r14 sete r9b neg r9 inc r14 movdqu xmm6, OWORD PTR [rdx+176] movdqu xmm7, OWORD PTR [rdx+192] mov r10, QWORD PTR [rdx+208] movdqu xmm8, OWORD PTR [rdx+288] movdqu xmm9, OWORD PTR [rdx+304] movdqu xmm10, OWORD PTR [rdx+320] movdqu xmm11, OWORD PTR [rdx+336] mov r11, QWORD PTR [rdx+352] add rdx, 440 pand xmm6, xmm12 pand xmm7, xmm12 pand xmm8, xmm12 pand xmm9, xmm12 pand xmm10, xmm12 pand xmm11, xmm12 and r10, r9 and r11, r9 por xmm0, xmm6 por xmm1, xmm7 por xmm2, xmm8 por xmm3, xmm9 por xmm4, xmm10 por xmm5, xmm11 or r12, r10 or r13, r11 dec rax jnz L_521_get_point_33_9_start_2 movdqu OWORD PTR [rcx+176], xmm0 movdqu OWORD PTR [rcx+192], xmm1 mov QWORD PTR [rcx+208], r12 movdqu OWORD PTR [rcx+288], xmm2 movdqu OWORD PTR [rcx+304], xmm3 movdqu OWORD PTR [rcx+320], xmm4 movdqu OWORD PTR [rcx+336], xmm5 mov QWORD PTR [rcx+352], r13 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] movdqu xmm12, OWORD PTR [rsp+96] movdqu xmm13, OWORD PTR [rsp+112] movdqu xmm14, OWORD PTR [rsp+128] movdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 pop r14 pop r13 pop r12 ret sp_521_get_point_33_9 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible point that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ _text SEGMENT READONLY PARA sp_521_get_point_33_avx2_9 PROC push r12 push r13 push r14 push r15 push rdi sub rsp, 160 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 vmovdqu OWORD PTR [rsp+96], xmm12 vmovdqu OWORD PTR [rsp+112], xmm13 vmovdqu OWORD PTR [rsp+128], xmm14 vmovdqu OWORD PTR [rsp+144], xmm15 mov rdi, 1 mov rax, 1 movd xmm13, r8d add rdx, 440 movd xmm15, eax mov rax, 32 vpxor ymm14, ymm14, ymm14 vpermd ymm13, ymm14, ymm13 vpermd ymm15, ymm14, ymm15 vpxor ymm0, ymm0, ymm0 vpxor ymm1, ymm1, ymm1 vpxor ymm2, ymm2, ymm2 vpxor ymm3, ymm3, ymm3 vpxor ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 xor r10, r10 xor r11, r11 xor r12, r12 vmovdqa ymm14, ymm15 L_521_get_point_33_avx2_9_start: vpcmpeqd ymm12, ymm14, ymm13 vpaddd ymm14, ymm14, ymm15 xor r9, r9 cmp r8, rdi sete r9b neg r9 inc rdi vmovupd ymm6, YMMWORD PTR [rdx] vmovupd ymm7, YMMWORD PTR [rdx+32] vmovupd ymm8, YMMWORD PTR [rdx+144] vmovupd ymm9, YMMWORD PTR [rdx+176] vmovupd ymm10, YMMWORD PTR [rdx+288] vmovupd ymm11, YMMWORD PTR [rdx+320] mov r13, QWORD PTR [rdx+64] mov r14, QWORD PTR [rdx+208] mov r15, QWORD PTR [rdx+352] add rdx, 440 vpand ymm6, ymm6, ymm12 vpand ymm7, ymm7, ymm12 vpand ymm8, ymm8, ymm12 vpand ymm9, ymm9, ymm12 vpand ymm10, ymm10, ymm12 vpand ymm11, ymm11, ymm12 and r13, r9 and r14, r9 and r15, r9 vpor ymm0, ymm0, ymm6 vpor ymm1, ymm1, ymm7 vpor ymm2, ymm2, ymm8 vpor ymm3, ymm3, ymm9 vpor ymm4, ymm4, ymm10 vpor ymm5, ymm5, ymm11 or r10, r13 or r11, r14 or r12, r15 dec rax jnz L_521_get_point_33_avx2_9_start vmovupd YMMWORD PTR [rcx], ymm0 vmovupd YMMWORD PTR [rcx+32], ymm1 vmovupd YMMWORD PTR [rcx+144], ymm2 vmovupd YMMWORD PTR [rcx+176], ymm3 vmovupd YMMWORD PTR [rcx+288], ymm4 vmovupd YMMWORD PTR [rcx+320], ymm5 mov QWORD PTR [rcx+64], r10 mov QWORD PTR [rcx+208], r11 mov QWORD PTR [rcx+352], r12 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] vmovdqu xmm12, OWORD PTR [rsp+96] vmovdqu xmm13, OWORD PTR [rsp+112] vmovdqu xmm14, OWORD PTR [rsp+128] vmovdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_get_point_33_avx2_9 ENDP _text ENDS ENDIF ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Multiply two Montgomery form numbers mod the modulus (prime). ; * (r = a * b mod m) ; * ; * r Result of multiplication. ; * a First number to multiply in Montgomery form. ; * b Second number to multiply in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_521_mont_mul_avx2_9 PROC push rbx push rbp push r12 push r13 push r14 push r15 mov rbp, r8 mov r8, rcx mov r9, rdx sub rsp, 144 mov rbx, rsp add rsp, 72 xor r15, r15 mov rdx, QWORD PTR [r9] ; A[0] * B[0] mulx r11, r10, QWORD PTR [rbp] ; A[0] * B[1] mulx r12, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx], r10 adcx r11, rax ; A[0] * B[2] mulx r13, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+8], r11 adcx r12, rax mov QWORD PTR [rbx+16], r12 ; A[0] * B[3] mulx r10, rax, QWORD PTR [rbp+24] adcx r13, rax ; A[0] * B[4] mulx r11, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+24], r13 adcx r10, rax ; A[0] * B[5] mulx r12, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+32], r10 adcx r11, rax mov QWORD PTR [rbx+40], r11 ; A[0] * B[6] mulx r13, rax, QWORD PTR [rbp+48] adcx r12, rax ; A[0] * B[7] mulx r10, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+48], r12 adcx r13, rax ; A[0] * B[8] mulx r11, rax, QWORD PTR [rbp+64] mov QWORD PTR [rbx+56], r13 adcx r10, rax adcx r11, r15 mov r14, r15 adcx r14, r15 mov QWORD PTR [rbx+64], r10 mov QWORD PTR [rsp], r11 mov rdx, QWORD PTR [r9+8] mov r11, QWORD PTR [rbx+8] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r10, QWORD PTR [rbx+32] ; A[1] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[1] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+8], r11 adcx r12, rax adox r13, rcx ; A[1] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [rbx+24], r13 mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] ; A[1] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r10, rax adox r11, rcx ; A[1] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+32], r10 adcx r11, rax adox r12, rcx ; A[1] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+48], r12 mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [rsp] ; A[1] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r13, rax adox r10, rcx ; A[1] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx ; A[1] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rbx+64], r10 mov r12, r15 adcx r11, rax adox r12, rcx adcx r12, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [rsp], r11 mov QWORD PTR [rsp+8], r12 mov rdx, QWORD PTR [r9+16] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r10, QWORD PTR [rbx+32] mov r11, QWORD PTR [rbx+40] ; A[2] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[2] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r10, rcx ; A[2] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+24], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+32], r10 mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] ; A[2] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r11, rax adox r12, rcx ; A[2] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx ; A[2] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [rbx+56], r13 mov r11, QWORD PTR [rsp] mov r12, QWORD PTR [rsp+8] ; A[2] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r10, rax adox r11, rcx ; A[2] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx ; A[2] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rsp], r11 mov r13, r15 adcx r12, rax adox r13, rcx adcx r13, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [rsp+8], r12 mov QWORD PTR [rsp+16], r13 mov rdx, QWORD PTR [r9+24] mov r13, QWORD PTR [rbx+24] mov r10, QWORD PTR [rbx+32] mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] ; A[3] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r10, rcx ; A[3] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+24], r13 adcx r10, rax adox r11, rcx ; A[3] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+32], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+40], r11 mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [rsp] ; A[3] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r12, rax adox r13, rcx ; A[3] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx ; A[3] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+64], r10 mov r12, QWORD PTR [rsp+8] mov r13, QWORD PTR [rsp+16] ; A[3] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[3] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rsp], r11 adcx r12, rax adox r13, rcx ; A[3] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rsp+8], r12 mov r10, r15 adcx r13, rax adox r10, rcx adcx r10, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [rsp+16], r13 mov QWORD PTR [rsp+24], r10 mov rdx, QWORD PTR [r9+32] mov r10, QWORD PTR [rbx+32] mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] ; A[4] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[4] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+32], r10 adcx r11, rax adox r12, rcx ; A[4] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+48], r12 mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [rsp] mov r12, QWORD PTR [rsp+8] ; A[4] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r13, rax adox r10, rcx ; A[4] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx ; A[4] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rsp], r11 mov r13, QWORD PTR [rsp+16] mov r10, QWORD PTR [rsp+24] ; A[4] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r13, rcx ; A[4] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rsp+8], r12 adcx r13, rax adox r10, rcx ; A[4] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rsp+16], r13 mov r11, r15 adcx r10, rax adox r11, rcx adcx r11, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [rsp+24], r10 mov QWORD PTR [rsp+32], r11 mov rdx, QWORD PTR [r9+40] mov r11, QWORD PTR [rbx+40] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] ; A[5] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[5] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+40], r11 adcx r12, rax adox r13, rcx ; A[5] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [rbx+56], r13 mov r11, QWORD PTR [rsp] mov r12, QWORD PTR [rsp+8] mov r13, QWORD PTR [rsp+16] ; A[5] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r10, rax adox r11, rcx ; A[5] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx ; A[5] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rsp], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rsp+8], r12 mov r10, QWORD PTR [rsp+24] mov r11, QWORD PTR [rsp+32] ; A[5] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r13, rax adox r10, rcx ; A[5] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rsp+16], r13 adcx r10, rax adox r11, rcx ; A[5] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rsp+24], r10 mov r12, r15 adcx r11, rax adox r12, rcx adcx r12, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [rsp+32], r11 mov QWORD PTR [rsp+40], r12 mov rdx, QWORD PTR [r9+48] mov r12, QWORD PTR [rbx+48] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [rsp] ; A[6] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[6] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+48], r12 adcx r13, rax adox r10, rcx ; A[6] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+64], r10 mov r12, QWORD PTR [rsp+8] mov r13, QWORD PTR [rsp+16] mov r10, QWORD PTR [rsp+24] ; A[6] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r11, rax adox r12, rcx ; A[6] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rsp], r11 adcx r12, rax adox r13, rcx ; A[6] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rsp+8], r12 adcx r13, rax adox r10, rcx mov QWORD PTR [rsp+16], r13 mov r11, QWORD PTR [rsp+32] mov r12, QWORD PTR [rsp+40] ; A[6] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r10, rax adox r11, rcx ; A[6] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rsp+24], r10 adcx r11, rax adox r12, rcx ; A[6] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rsp+32], r11 mov r13, r15 adcx r12, rax adox r13, rcx adcx r13, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [rsp+40], r12 mov QWORD PTR [rsp+48], r13 mov rdx, QWORD PTR [r9+56] mov r13, QWORD PTR [rbx+56] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [rsp] mov r12, QWORD PTR [rsp+8] ; A[7] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r10, rcx ; A[7] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+56], r13 adcx r10, rax adox r11, rcx ; A[7] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rsp], r11 mov r13, QWORD PTR [rsp+16] mov r10, QWORD PTR [rsp+24] mov r11, QWORD PTR [rsp+32] ; A[7] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r12, rax adox r13, rcx ; A[7] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rsp+8], r12 adcx r13, rax adox r10, rcx ; A[7] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rsp+16], r13 adcx r10, rax adox r11, rcx mov QWORD PTR [rsp+24], r10 mov r12, QWORD PTR [rsp+40] mov r13, QWORD PTR [rsp+48] ; A[7] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r11, rax adox r12, rcx ; A[7] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rsp+32], r11 adcx r12, rax adox r13, rcx ; A[7] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rsp+40], r12 mov r10, r15 adcx r13, rax adox r10, rcx adcx r10, r14 mov r14, r15 adox r14, r15 adcx r14, r15 mov QWORD PTR [rsp+48], r13 mov QWORD PTR [rsp+56], r10 mov rdx, QWORD PTR [r9+64] mov r10, QWORD PTR [rbx+64] mov r11, QWORD PTR [rsp] mov r12, QWORD PTR [rsp+8] mov r13, QWORD PTR [rsp+16] ; A[8] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[8] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+64], r10 adcx r11, rax adox r12, rcx ; A[8] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rsp], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rsp+8], r12 mov r10, QWORD PTR [rsp+24] mov r11, QWORD PTR [rsp+32] mov r12, QWORD PTR [rsp+40] ; A[8] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] adcx r13, rax adox r10, rcx ; A[8] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] mov QWORD PTR [rsp+16], r13 adcx r10, rax adox r11, rcx ; A[8] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rsp+24], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rsp+32], r11 mov r13, QWORD PTR [rsp+48] mov r10, QWORD PTR [rsp+56] ; A[8] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] adcx r12, rax adox r13, rcx ; A[8] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rsp+40], r12 adcx r13, rax adox r10, rcx ; A[8] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] mov QWORD PTR [rsp+48], r13 mov r11, r15 adcx r10, rax adox r11, rcx adcx r11, r14 mov QWORD PTR [rsp+56], r10 mov QWORD PTR [rsp+64], r11 mov rax, QWORD PTR [rsp+-8] mov rcx, QWORD PTR [rsp] mov r10, QWORD PTR [rsp+8] mov r15, rax and r15, 511 mov r11, QWORD PTR [rsp+16] mov r12, QWORD PTR [rsp+24] mov r13, QWORD PTR [rsp+32] mov r14, QWORD PTR [rsp+40] mov rbx, QWORD PTR [rsp+48] mov rdx, QWORD PTR [rsp+56] sub rsp, 72 shrd rax, rcx, 9 shrd rcx, r10, 9 shrd r10, r11, 9 shrd r11, r12, 9 shrd r12, r13, 9 shrd r13, r14, 9 shrd r14, rbx, 9 shrd rbx, rdx, 9 shr rdx, 9 add rax, QWORD PTR [rsp] adc rcx, QWORD PTR [rsp+8] adc r10, QWORD PTR [rsp+16] adc r11, QWORD PTR [rsp+24] adc r12, QWORD PTR [rsp+32] adc r13, QWORD PTR [rsp+40] adc r14, QWORD PTR [rsp+48] adc rbx, QWORD PTR [rsp+56] adc r15, rdx mov rdx, r15 shr r15, 9 and rdx, 511 add rax, r15 adc rcx, 0 adc r10, 0 adc r11, 0 adc r12, 0 adc r13, 0 adc r14, 0 adc rbx, 0 adc rdx, 0 mov QWORD PTR [r8], rax mov QWORD PTR [r8+8], rcx mov QWORD PTR [r8+16], r10 mov QWORD PTR [r8+24], r11 mov QWORD PTR [r8+32], r12 mov QWORD PTR [r8+40], r13 mov QWORD PTR [r8+48], r14 mov QWORD PTR [r8+56], rbx mov QWORD PTR [r8+64], rdx add rsp, 144 pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx ret sp_521_mont_mul_avx2_9 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ _text SEGMENT READONLY PARA sp_521_mont_sqr_avx2_9 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov r8, rcx mov r9, rdx sub rsp, 144 mov rbp, rsp add rsp, 72 xor r12, r12 ; Diagonal 1 ; Zero into %r9 ; A[1] x A[0] mov rdx, QWORD PTR [r9] mulx r11, r10, QWORD PTR [r9+8] mov QWORD PTR [rbp+8], r10 ; Zero into %r8 ; A[2] x A[0] mulx r10, rax, QWORD PTR [r9+16] adcx r11, rax adox r10, r12 mov QWORD PTR [rbp+16], r11 ; No load %r12 - %r9 ; A[3] x A[0] mulx r14, rax, QWORD PTR [r9+24] adcx r10, rax adox r14, r12 mov QWORD PTR [rbp+24], r10 ; No load %r13 - %r8 ; A[4] x A[0] mulx r15, rax, QWORD PTR [r9+32] adcx r14, rax adox r15, r12 ; No store %r12 - %r9 ; No load %r14 - %r9 ; A[5] x A[0] mulx rdi, rax, QWORD PTR [r9+40] adcx r15, rax adox rdi, r12 ; No store %r13 - %r8 ; No load %r15 - %r8 ; A[6] x A[0] mulx rsi, rax, QWORD PTR [r9+48] adcx rdi, rax adox rsi, r12 ; No store %r14 - %r9 ; No load %rbx - %r9 ; A[7] x A[0] mulx rbx, rax, QWORD PTR [r9+56] adcx rsi, rax adox rbx, r12 ; No store %r15 - %r8 ; Zero into %r8 ; A[8] x A[0] mulx r10, rax, QWORD PTR [r9+64] adcx rbx, rax adox r10, r12 ; No store %rbx - %r9 ; Zero into %r9 ; A[8] x A[1] mov rdx, QWORD PTR [r9+8] mulx r11, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, r12 mov QWORD PTR [rsp], r10 ; Carry adcx r11, r12 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [rsp+8], r11 ; Diagonal 2 mov r11, QWORD PTR [rbp+24] ; No load %r12 - %r8 ; A[2] x A[1] mulx rcx, rax, QWORD PTR [r9+16] adcx r11, rax adox r14, rcx mov QWORD PTR [rbp+24], r11 ; No load %r13 - %r9 ; A[3] x A[1] mulx rcx, rax, QWORD PTR [r9+24] adcx r14, rax adox r15, rcx ; No store %r12 - %r8 ; No load %r14 - %r8 ; A[4] x A[1] mulx rcx, rax, QWORD PTR [r9+32] adcx r15, rax adox rdi, rcx ; No store %r13 - %r9 ; No load %r15 - %r9 ; A[5] x A[1] mulx rcx, rax, QWORD PTR [r9+40] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r8 ; No load %rbx - %r8 ; A[6] x A[1] mulx rcx, rax, QWORD PTR [r9+48] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r9 mov r11, QWORD PTR [rsp] ; A[7] x A[1] mulx rcx, rax, QWORD PTR [r9+56] adcx rbx, rax adox r11, rcx ; No store %rbx - %r8 mov r10, QWORD PTR [rsp+8] ; A[7] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, QWORD PTR [r9+56] adcx r11, rax adox r10, rcx mov QWORD PTR [rsp], r11 ; Zero into %r9 ; A[7] x A[3] mov rdx, QWORD PTR [r9+24] mulx r11, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, r12 mov QWORD PTR [rsp+8], r10 ; Zero into %r8 ; A[7] x A[4] mov rdx, QWORD PTR [r9+32] mulx r10, rax, QWORD PTR [r9+56] adcx r11, rax adox r10, r12 mov QWORD PTR [rsp+16], r11 ; Carry adcx r10, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [rsp+24], r10 ; Diagonal 3 ; No load %r14 - %r9 ; A[3] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, QWORD PTR [r9+24] adcx r15, rax adox rdi, rcx ; No store %r13 - %r8 ; No load %r15 - %r8 ; A[4] x A[2] mulx rcx, rax, QWORD PTR [r9+32] adcx rdi, rax adox rsi, rcx ; No store %r14 - %r9 ; No load %rbx - %r9 ; A[5] x A[2] mulx rcx, rax, QWORD PTR [r9+40] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r8 mov r10, QWORD PTR [rsp] ; A[6] x A[2] mulx rcx, rax, QWORD PTR [r9+48] adcx rbx, rax adox r10, rcx ; No store %rbx - %r9 mov r11, QWORD PTR [rsp+8] ; A[6] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r11, rcx mov QWORD PTR [rsp], r10 mov r10, QWORD PTR [rsp+16] ; A[6] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+48] adcx r11, rax adox r10, rcx mov QWORD PTR [rsp+8], r11 mov r11, QWORD PTR [rsp+24] ; A[6] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r11, rcx mov QWORD PTR [rsp+16], r10 ; Zero into %r8 ; A[8] x A[4] mov rdx, QWORD PTR [r9+32] mulx r10, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, r12 mov QWORD PTR [rsp+24], r11 ; Zero into %r9 ; A[8] x A[5] mov rdx, QWORD PTR [r9+40] mulx r11, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, r12 mov QWORD PTR [rsp+32], r10 ; Carry adcx r11, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [rsp+40], r11 ; Diagonal 4 ; No load %rbx - %r8 ; A[4] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+32] adcx rsi, rax adox rbx, rcx ; No store %r15 - %r9 mov r11, QWORD PTR [rsp] ; A[5] x A[3] mulx rcx, rax, QWORD PTR [r9+40] adcx rbx, rax adox r11, rcx ; No store %rbx - %r8 mov r10, QWORD PTR [rsp+8] ; A[5] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+40] adcx r11, rax adox r10, rcx mov QWORD PTR [rsp], r11 mov r11, QWORD PTR [rsp+16] ; A[8] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, rcx mov QWORD PTR [rsp+8], r10 mov r10, QWORD PTR [rsp+24] ; A[8] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, rcx mov QWORD PTR [rsp+16], r11 mov r11, QWORD PTR [rsp+32] ; A[7] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, rcx mov QWORD PTR [rsp+24], r10 mov r10, QWORD PTR [rsp+40] ; A[7] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+56] adcx r11, rax adox r10, rcx mov QWORD PTR [rsp+32], r11 ; Zero into %r9 ; A[8] x A[6] mulx r11, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, r12 mov QWORD PTR [rsp+40], r10 ; Zero into %r8 ; A[8] x A[7] mov rdx, QWORD PTR [r9+56] mulx r10, rax, QWORD PTR [r9+64] adcx r11, rax adox r10, r12 mov QWORD PTR [rsp+48], r11 ; Carry adcx r10, r13 mov r13, r12 adcx r13, r12 adox r13, r12 mov QWORD PTR [rsp+56], r10 mov QWORD PTR [rsp+64], r13 ; Double and Add in A[i] x A[i] mov r11, QWORD PTR [rbp+8] ; A[0] x A[0] mov rdx, QWORD PTR [r9] mulx rcx, rax, rdx mov QWORD PTR [rbp], rax adox r11, r11 adcx r11, rcx mov QWORD PTR [rbp+8], r11 mov r10, QWORD PTR [rbp+16] mov r11, QWORD PTR [rbp+24] ; A[1] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+16], r10 mov QWORD PTR [rbp+24], r11 ; A[2] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, rdx adox r14, r14 adox r15, r15 adcx r14, rax adcx r15, rcx ; A[3] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, rdx adox rdi, rdi adox rsi, rsi adcx rdi, rax adcx rsi, rcx mov r11, QWORD PTR [rsp] ; A[4] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, rdx adox rbx, rbx adox r11, r11 adcx rbx, rax adcx r11, rcx mov QWORD PTR [rsp], r11 mov r10, QWORD PTR [rsp+8] mov r11, QWORD PTR [rsp+16] ; A[5] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rsp+8], r10 mov QWORD PTR [rsp+16], r11 mov r10, QWORD PTR [rsp+24] mov r11, QWORD PTR [rsp+32] ; A[6] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rsp+24], r10 mov QWORD PTR [rsp+32], r11 mov r10, QWORD PTR [rsp+40] mov r11, QWORD PTR [rsp+48] ; A[7] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rsp+40], r10 mov QWORD PTR [rsp+48], r11 mov r10, QWORD PTR [rsp+56] mov r11, QWORD PTR [rsp+64] ; A[8] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rsp+56], r10 mov QWORD PTR [rsp+64], r11 mov QWORD PTR [rsp+-40], r14 mov QWORD PTR [rsp+-32], r15 mov QWORD PTR [rsp+-24], rdi mov QWORD PTR [rsp+-16], rsi mov QWORD PTR [rsp+-8], rbx mov r10, QWORD PTR [rsp+-8] mov r11, QWORD PTR [rsp] mov r14, QWORD PTR [rsp+8] mov rcx, r10 and rcx, 511 mov r15, QWORD PTR [rsp+16] mov rdi, QWORD PTR [rsp+24] mov rsi, QWORD PTR [rsp+32] mov rbx, QWORD PTR [rsp+40] mov rdx, QWORD PTR [rsp+48] mov rax, QWORD PTR [rsp+56] sub rsp, 72 shrd r10, r11, 9 shrd r11, r14, 9 shrd r14, r15, 9 shrd r15, rdi, 9 shrd rdi, rsi, 9 shrd rsi, rbx, 9 shrd rbx, rdx, 9 shrd rdx, rax, 9 shr rax, 9 add r10, QWORD PTR [rsp] adc r11, QWORD PTR [rsp+8] adc r14, QWORD PTR [rsp+16] adc r15, QWORD PTR [rsp+24] adc rdi, QWORD PTR [rsp+32] adc rsi, QWORD PTR [rsp+40] adc rbx, QWORD PTR [rsp+48] adc rdx, QWORD PTR [rsp+56] adc rcx, rax mov rax, rcx shr rcx, 9 and rax, 511 add r10, rcx adc r11, 0 adc r14, 0 adc r15, 0 adc rdi, 0 adc rsi, 0 adc rbx, 0 adc rdx, 0 adc rax, 0 mov QWORD PTR [r8], r10 mov QWORD PTR [r8+8], r11 mov QWORD PTR [r8+16], r14 mov QWORD PTR [r8+24], r15 mov QWORD PTR [r8+32], rdi mov QWORD PTR [r8+40], rsi mov QWORD PTR [r8+48], rbx mov QWORD PTR [r8+56], rdx mov QWORD PTR [r8+64], rax add rsp, 144 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_521_mont_sqr_avx2_9 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_521_cond_sub_avx2_9 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 sbb r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 sbb r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 sbb r12, r10 mov QWORD PTR [rcx+64], r12 sbb rax, rax pop r12 ret sp_521_cond_sub_avx2_9 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 521 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_521_mont_reduce_order_avx2_9 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r9, rcx mov r10, rdx xor rbp, rbp ; i = 9 mov r11, 8 mov r14, QWORD PTR [r9] mov r15, QWORD PTR [r9+8] mov rdi, QWORD PTR [r9+16] mov rsi, QWORD PTR [r9+24] add r9, 32 xor rbp, rbp L_521_mont_reduce_order_avx2_9_loop: ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx mov QWORD PTR [r9+-32], r12 ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+8] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 adcx r13, rbp mov rbp, rbx mov QWORD PTR [r9+40], r13 adox rbp, rbx adcx rbp, rbx ; mu = a[i] * mp mov rdx, r14 mov r13, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r13, rax adox r14, rcx mov QWORD PTR [r9+-24], r13 ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+8] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r12, QWORD PTR [r9+16] adcx rsi, rax adox r12, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r12, QWORD PTR [r9+48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+40], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+48], r12 adox rbp, rbx adcx rbp, rbx ; a += 2 add r9, 16 ; i -= 2 sub r11, 2 jnz L_521_mont_reduce_order_avx2_9_loop ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 and rdx, 511 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx mov QWORD PTR [r9+-32], r12 ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+8] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 adcx r13, rbp mov rbp, rbx mov QWORD PTR [r9+40], r13 adox rbp, rbx ; a += 1 add r9, 8 mov QWORD PTR [r9+-32], r14 mov QWORD PTR [r9+-24], r15 mov QWORD PTR [r9+-16], rdi mov QWORD PTR [r9+-8], rsi sub r9, 32 lea r8, QWORD PTR [r9+-8] sub r9, 72 mov r12, QWORD PTR [r8] mov r14, QWORD PTR [r8+8] mov r15, QWORD PTR [r8+16] mov rdi, QWORD PTR [r8+24] mov r13, QWORD PTR [r8+32] shrd r12, r14, 9 shrd r14, r15, 9 shrd r15, rdi, 9 shrd rdi, r13, 9 mov QWORD PTR [r9], r12 mov QWORD PTR [r9+8], r14 mov QWORD PTR [r9+16], r15 mov QWORD PTR [r9+24], rdi mov r14, QWORD PTR [r8+40] mov r15, QWORD PTR [r8+48] mov rdi, QWORD PTR [r8+56] mov r12, QWORD PTR [r8+64] shrd r13, r14, 9 shrd r14, r15, 9 shrd r15, rdi, 9 shrd rdi, r12, 9 mov QWORD PTR [r9+32], r13 mov QWORD PTR [r9+40], r14 mov QWORD PTR [r9+48], r15 mov QWORD PTR [r9+56], rdi mov r14, QWORD PTR [r8+72] shrd r12, r14, 9 shr r14, 9 mov QWORD PTR [r9+64], r12 mov QWORD PTR [r9+72], r14 mov rbp, QWORD PTR [r9+64] shr rbp, 9 neg rbp mov rcx, QWORD PTR [r10] mov rdx, QWORD PTR [r9] pext rcx, rcx, rbp sub rdx, rcx mov rcx, QWORD PTR [r10+8] mov rax, QWORD PTR [r9+8] pext rcx, rcx, rbp mov QWORD PTR [r9], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+16] mov rcx, QWORD PTR [r9+16] pext rdx, rdx, rbp mov QWORD PTR [r9+8], rax sbb rcx, rdx mov rax, QWORD PTR [r10+24] mov rdx, QWORD PTR [r9+24] pext rax, rax, rbp mov QWORD PTR [r9+16], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+32] mov rax, QWORD PTR [r9+32] pext rcx, rcx, rbp mov QWORD PTR [r9+24], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+40] mov rcx, QWORD PTR [r9+40] pext rdx, rdx, rbp mov QWORD PTR [r9+32], rax sbb rcx, rdx mov rax, QWORD PTR [r10+48] mov rdx, QWORD PTR [r9+48] pext rax, rax, rbp mov QWORD PTR [r9+40], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+56] mov rax, QWORD PTR [r9+56] pext rcx, rcx, rbp mov QWORD PTR [r9+48], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+64] mov rcx, QWORD PTR [r9+64] pext rdx, rdx, rbp mov QWORD PTR [r9+56], rax sbb rcx, rdx mov QWORD PTR [r9+64], rcx pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_reduce_order_avx2_9 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_521_mont_div2_avx2_9 PROC push r12 push r13 push r14 push r15 push rdi mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r11, QWORD PTR [rdx+32] mov r12, QWORD PTR [rdx+40] mov r13, QWORD PTR [rdx+48] mov r14, QWORD PTR [rdx+56] mov r15, QWORD PTR [rdx+64] mov rdi, rax and rdi, 1 sub rax, rdi sbb r8, 0 sbb r9, 0 sbb r10, 0 sbb r11, 0 sbb r12, 0 sbb r13, 0 sbb r14, 0 sbb r15, 0 shl rdi, 9 add r15, rdi shrd rax, r8, 1 shrd r8, r9, 1 shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 shrd r12, r13, 1 shrd r13, r14, 1 shrd r14, r15, 1 shr r15, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 mov QWORD PTR [rcx+32], r11 mov QWORD PTR [rcx+40], r12 mov QWORD PTR [rcx+48], r13 mov QWORD PTR [rcx+56], r14 mov QWORD PTR [rcx+64], r15 pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_521_mont_div2_avx2_9 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_521_get_entry_64_9 PROC push r12 sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 movdqu OWORD PTR [rsp+96], xmm12 movdqu OWORD PTR [rsp+112], xmm13 movdqu OWORD PTR [rsp+128], xmm14 movdqu OWORD PTR [rsp+144], xmm15 ; From entry 1 mov r12, 1 mov rax, 1 movd xmm13, r8d add rdx, 144 movd xmm15, eax mov rax, 63 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 xor r11, r11 movdqa xmm14, xmm15 L_521_get_entry_64_9_start_0: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 xor r9, r9 cmp r8, r12 sete r9b neg r9 inc r12 movdqu xmm4, OWORD PTR [rdx] movdqu xmm5, OWORD PTR [rdx+16] movdqu xmm6, OWORD PTR [rdx+32] movdqu xmm7, OWORD PTR [rdx+48] mov r10, QWORD PTR [rdx+64] add rdx, 144 pand xmm4, xmm12 pand xmm5, xmm12 pand xmm6, xmm12 pand xmm7, xmm12 and r10, r9 por xmm0, xmm4 por xmm1, xmm5 por xmm2, xmm6 por xmm3, xmm7 or r11, r10 dec rax jnz L_521_get_entry_64_9_start_0 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+32], xmm2 movdqu OWORD PTR [rcx+48], xmm3 mov QWORD PTR [rcx+64], r11 ; From entry 1 mov r12, 1 mov rax, 1 movd xmm13, r8d sub rdx, 9000 movd xmm15, eax mov rax, 63 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 xor r11, r11 movdqa xmm14, xmm15 L_521_get_entry_64_9_start_1: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 xor r9, r9 cmp r8, r12 sete r9b neg r9 inc r12 movdqu xmm4, OWORD PTR [rdx] movdqu xmm5, OWORD PTR [rdx+16] movdqu xmm6, OWORD PTR [rdx+32] movdqu xmm7, OWORD PTR [rdx+48] mov r10, QWORD PTR [rdx+64] add rdx, 144 pand xmm4, xmm12 pand xmm5, xmm12 pand xmm6, xmm12 pand xmm7, xmm12 and r10, r9 por xmm0, xmm4 por xmm1, xmm5 por xmm2, xmm6 por xmm3, xmm7 or r11, r10 dec rax jnz L_521_get_entry_64_9_start_1 movdqu OWORD PTR [rcx+144], xmm0 movdqu OWORD PTR [rcx+160], xmm1 movdqu OWORD PTR [rcx+176], xmm2 movdqu OWORD PTR [rcx+192], xmm3 mov QWORD PTR [rcx+208], r11 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] movdqu xmm12, OWORD PTR [rsp+96] movdqu xmm13, OWORD PTR [rsp+112] movdqu xmm14, OWORD PTR [rsp+128] movdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 pop r12 ret sp_521_get_entry_64_9 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_521_get_entry_64_avx2_9 PROC push r12 push r13 push r14 sub rsp, 96 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 mov r14, 1 mov rax, 1 movd xmm9, r8d add rdx, 144 movd xmm11, eax mov rax, 64 vpxor ymm10, ymm10, ymm10 vpermd ymm9, ymm10, ymm9 vpermd ymm11, ymm10, ymm11 vpxor ymm0, ymm0, ymm0 vpxor ymm1, ymm1, ymm1 vpxor ymm2, ymm2, ymm2 vpxor ymm3, ymm3, ymm3 xor r10, r10 xor r11, r11 vmovdqa ymm10, ymm11 L_521_get_entry_64_avx2_9_start: vpcmpeqd ymm8, ymm10, ymm9 vpaddd ymm10, ymm10, ymm11 xor r9, r9 cmp r8, r14 sete r9b neg r9 inc r14 vmovupd ymm4, YMMWORD PTR [rdx] vmovupd ymm5, YMMWORD PTR [rdx+32] vmovupd ymm6, YMMWORD PTR [rdx+72] vmovupd ymm7, YMMWORD PTR [rdx+104] mov r12, QWORD PTR [rdx+64] mov r13, QWORD PTR [rdx+136] add rdx, 144 vpand ymm4, ymm4, ymm8 vpand ymm5, ymm5, ymm8 vpand ymm6, ymm6, ymm8 vpand ymm7, ymm7, ymm8 and r12, r9 and r13, r9 vpor ymm0, ymm0, ymm4 vpor ymm1, ymm1, ymm5 vpor ymm2, ymm2, ymm6 vpor ymm3, ymm3, ymm7 or r10, r12 or r11, r13 dec rax jnz L_521_get_entry_64_avx2_9_start vmovupd YMMWORD PTR [rcx], ymm0 vmovupd YMMWORD PTR [rcx+32], ymm1 vmovupd YMMWORD PTR [rcx+144], ymm2 vmovupd YMMWORD PTR [rcx+176], ymm3 mov QWORD PTR [rcx+64], r10 mov QWORD PTR [rcx+208], r11 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] add rsp, 96 pop r14 pop r13 pop r12 ret sp_521_get_entry_64_avx2_9 ENDP _text ENDS ENDIF ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_521_get_entry_65_9 PROC push r12 sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 movdqu OWORD PTR [rsp+16], xmm7 movdqu OWORD PTR [rsp+32], xmm8 movdqu OWORD PTR [rsp+48], xmm9 movdqu OWORD PTR [rsp+64], xmm10 movdqu OWORD PTR [rsp+80], xmm11 movdqu OWORD PTR [rsp+96], xmm12 movdqu OWORD PTR [rsp+112], xmm13 movdqu OWORD PTR [rsp+128], xmm14 movdqu OWORD PTR [rsp+144], xmm15 ; From entry 1 mov r12, 1 mov rax, 1 movd xmm13, r8d add rdx, 144 movd xmm15, eax mov rax, 64 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 xor r11, r11 movdqa xmm14, xmm15 L_521_get_entry_65_9_start_0: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 xor r9, r9 cmp r8, r12 sete r9b neg r9 inc r12 movdqu xmm4, OWORD PTR [rdx] movdqu xmm5, OWORD PTR [rdx+16] movdqu xmm6, OWORD PTR [rdx+32] movdqu xmm7, OWORD PTR [rdx+48] mov r10, QWORD PTR [rdx+64] add rdx, 144 pand xmm4, xmm12 pand xmm5, xmm12 pand xmm6, xmm12 pand xmm7, xmm12 and r10, r9 por xmm0, xmm4 por xmm1, xmm5 por xmm2, xmm6 por xmm3, xmm7 or r11, r10 dec rax jnz L_521_get_entry_65_9_start_0 movdqu OWORD PTR [rcx], xmm0 movdqu OWORD PTR [rcx+16], xmm1 movdqu OWORD PTR [rcx+32], xmm2 movdqu OWORD PTR [rcx+48], xmm3 mov QWORD PTR [rcx+64], r11 ; From entry 1 mov r12, 1 mov rax, 1 movd xmm13, r8d sub rdx, 9144 movd xmm15, eax mov rax, 64 pshufd xmm15, xmm15, 0 pshufd xmm13, xmm13, 0 pxor xmm14, xmm14 pxor xmm0, xmm0 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 xor r11, r11 movdqa xmm14, xmm15 L_521_get_entry_65_9_start_1: movdqa xmm12, xmm14 paddd xmm14, xmm15 pcmpeqd xmm12, xmm13 xor r9, r9 cmp r8, r12 sete r9b neg r9 inc r12 movdqu xmm4, OWORD PTR [rdx] movdqu xmm5, OWORD PTR [rdx+16] movdqu xmm6, OWORD PTR [rdx+32] movdqu xmm7, OWORD PTR [rdx+48] mov r10, QWORD PTR [rdx+64] add rdx, 144 pand xmm4, xmm12 pand xmm5, xmm12 pand xmm6, xmm12 pand xmm7, xmm12 and r10, r9 por xmm0, xmm4 por xmm1, xmm5 por xmm2, xmm6 por xmm3, xmm7 or r11, r10 dec rax jnz L_521_get_entry_65_9_start_1 movdqu OWORD PTR [rcx+144], xmm0 movdqu OWORD PTR [rcx+160], xmm1 movdqu OWORD PTR [rcx+176], xmm2 movdqu OWORD PTR [rcx+192], xmm3 mov QWORD PTR [rcx+208], r11 movdqu xmm6, OWORD PTR [rsp] movdqu xmm7, OWORD PTR [rsp+16] movdqu xmm8, OWORD PTR [rsp+32] movdqu xmm9, OWORD PTR [rsp+48] movdqu xmm10, OWORD PTR [rsp+64] movdqu xmm11, OWORD PTR [rsp+80] movdqu xmm12, OWORD PTR [rsp+96] movdqu xmm13, OWORD PTR [rsp+112] movdqu xmm14, OWORD PTR [rsp+128] movdqu xmm15, OWORD PTR [rsp+144] add rsp, 160 pop r12 ret sp_521_get_entry_65_9 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * ; * r Point to copy into. ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ _text SEGMENT READONLY PARA sp_521_get_entry_65_avx2_9 PROC push r12 push r13 push r14 sub rsp, 96 vmovdqu OWORD PTR [rsp], xmm6 vmovdqu OWORD PTR [rsp+16], xmm7 vmovdqu OWORD PTR [rsp+32], xmm8 vmovdqu OWORD PTR [rsp+48], xmm9 vmovdqu OWORD PTR [rsp+64], xmm10 vmovdqu OWORD PTR [rsp+80], xmm11 mov r14, 1 mov rax, 1 movd xmm9, r8d add rdx, 144 movd xmm11, eax mov rax, 65 vpxor ymm10, ymm10, ymm10 vpermd ymm9, ymm10, ymm9 vpermd ymm11, ymm10, ymm11 vpxor ymm0, ymm0, ymm0 vpxor ymm1, ymm1, ymm1 vpxor ymm2, ymm2, ymm2 vpxor ymm3, ymm3, ymm3 xor r10, r10 xor r11, r11 vmovdqa ymm10, ymm11 L_521_get_entry_65_avx2_9_start: vpcmpeqd ymm8, ymm10, ymm9 vpaddd ymm10, ymm10, ymm11 xor r9, r9 cmp r8, r14 sete r9b neg r9 inc r14 vmovupd ymm4, YMMWORD PTR [rdx] vmovupd ymm5, YMMWORD PTR [rdx+32] vmovupd ymm6, YMMWORD PTR [rdx+72] vmovupd ymm7, YMMWORD PTR [rdx+104] mov r12, QWORD PTR [rdx+64] mov r13, QWORD PTR [rdx+136] add rdx, 144 vpand ymm4, ymm4, ymm8 vpand ymm5, ymm5, ymm8 vpand ymm6, ymm6, ymm8 vpand ymm7, ymm7, ymm8 and r12, r9 and r13, r9 vpor ymm0, ymm0, ymm4 vpor ymm1, ymm1, ymm5 vpor ymm2, ymm2, ymm6 vpor ymm3, ymm3, ymm7 or r10, r12 or r11, r13 dec rax jnz L_521_get_entry_65_avx2_9_start vmovupd YMMWORD PTR [rcx], ymm0 vmovupd YMMWORD PTR [rcx+32], ymm1 vmovupd YMMWORD PTR [rcx+144], ymm2 vmovupd YMMWORD PTR [rcx+176], ymm3 mov QWORD PTR [rcx+64], r10 mov QWORD PTR [rcx+208], r11 vmovdqu xmm6, OWORD PTR [rsp] vmovdqu xmm7, OWORD PTR [rsp+16] vmovdqu xmm8, OWORD PTR [rsp+32] vmovdqu xmm9, OWORD PTR [rsp+48] vmovdqu xmm10, OWORD PTR [rsp+64] vmovdqu xmm11, OWORD PTR [rsp+80] add rsp, 96 pop r14 pop r13 pop r12 ret sp_521_get_entry_65_avx2_9 ENDP _text ENDS ENDIF ENDIF ; /* Add 1 to a. (a = a + 1) ; * ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_521_add_one_9 PROC add QWORD PTR [rcx], 1 adc QWORD PTR [rcx+8], 0 adc QWORD PTR [rcx+16], 0 adc QWORD PTR [rcx+24], 0 adc QWORD PTR [rcx+32], 0 adc QWORD PTR [rcx+40], 0 adc QWORD PTR [rcx+48], 0 adc QWORD PTR [rcx+56], 0 adc QWORD PTR [rcx+64], 0 ret sp_521_add_one_9 ENDP _text ENDS ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_521_from_bin_bswap PROC push r12 push r13 mov r11, r8 mov r12, rcx add r11, r9 add r12, 65 xor r13, r13 jmp L_521_from_bin_bswap_64_end L_521_from_bin_bswap_64_start: sub r11, 64 mov rax, QWORD PTR [r11+56] mov r10, QWORD PTR [r11+48] bswap rax bswap r10 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [r11+40] mov r10, QWORD PTR [r11+32] bswap rax bswap r10 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [r11+24] mov r10, QWORD PTR [r11+16] bswap rax bswap r10 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [r11+8] mov r10, QWORD PTR [r11] bswap rax bswap r10 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_521_from_bin_bswap_64_end: cmp r9, 63 jg L_521_from_bin_bswap_64_start jmp L_521_from_bin_bswap_8_end L_521_from_bin_bswap_8_start: sub r11, 8 mov rax, QWORD PTR [r11] bswap rax mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_521_from_bin_bswap_8_end: cmp r9, 7 jg L_521_from_bin_bswap_8_start cmp r9, r13 je L_521_from_bin_bswap_hi_end mov r10, r13 mov rax, r13 L_521_from_bin_bswap_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_521_from_bin_bswap_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_521_from_bin_bswap_hi_end: cmp rcx, r12 jge L_521_from_bin_bswap_zero_end L_521_from_bin_bswap_zero_start: mov QWORD PTR [rcx], r13 add rcx, 8 cmp rcx, r12 jl L_521_from_bin_bswap_zero_start L_521_from_bin_bswap_zero_end: pop r13 pop r12 ret sp_521_from_bin_bswap ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_521_from_bin_movbe PROC push r12 mov r11, r8 mov r12, rcx add r11, r9 add r12, 66 jmp L_521_from_bin_movbe_64_end L_521_from_bin_movbe_64_start: sub r11, 64 movbe rax, QWORD PTR [r11+56] movbe r10, QWORD PTR [r11+48] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 movbe rax, QWORD PTR [r11+40] movbe r10, QWORD PTR [r11+32] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 movbe rax, QWORD PTR [r11+24] movbe r10, QWORD PTR [r11+16] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 movbe rax, QWORD PTR [r11+8] movbe r10, QWORD PTR [r11] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_521_from_bin_movbe_64_end: cmp r9, 63 jg L_521_from_bin_movbe_64_start jmp L_521_from_bin_movbe_8_end L_521_from_bin_movbe_8_start: sub r11, 8 movbe rax, QWORD PTR [r11] mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_521_from_bin_movbe_8_end: cmp r9, 7 jg L_521_from_bin_movbe_8_start cmp r9, 0 je L_521_from_bin_movbe_hi_end mov r10, 0 mov rax, 0 L_521_from_bin_movbe_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_521_from_bin_movbe_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_521_from_bin_movbe_hi_end: cmp rcx, r12 jge L_521_from_bin_movbe_zero_end L_521_from_bin_movbe_zero_start: mov QWORD PTR [rcx], 0 add rcx, 8 cmp rcx, r12 jl L_521_from_bin_movbe_zero_start L_521_from_bin_movbe_zero_end: pop r12 ret sp_521_from_bin_movbe ENDP _text ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 65 ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_521_to_bin_bswap_9 PROC mov r8b, BYTE PTR [rcx+64] mov al, BYTE PTR [rcx+65] mov BYTE PTR [rdx], al mov BYTE PTR [rdx+1], r8b mov rax, QWORD PTR [rcx+56] mov r8, QWORD PTR [rcx+48] bswap rax bswap r8 mov QWORD PTR [rdx+2], rax mov QWORD PTR [rdx+10], r8 mov rax, QWORD PTR [rcx+40] mov r8, QWORD PTR [rcx+32] bswap rax bswap r8 mov QWORD PTR [rdx+18], rax mov QWORD PTR [rdx+26], r8 mov rax, QWORD PTR [rcx+24] mov r8, QWORD PTR [rcx+16] bswap rax bswap r8 mov QWORD PTR [rdx+34], rax mov QWORD PTR [rdx+42], r8 mov rax, QWORD PTR [rcx+8] mov r8, QWORD PTR [rcx] bswap rax bswap r8 mov QWORD PTR [rdx+50], rax mov QWORD PTR [rdx+58], r8 ret sp_521_to_bin_bswap_9 ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 65 ; * Uses the movbe instruction which is optional. ; * ; * r A single precision integer. ; * a Byte array. ; */ _text SEGMENT READONLY PARA sp_521_to_bin_movbe_9 PROC mov r8b, BYTE PTR [rcx+64] mov al, BYTE PTR [rcx+65] mov BYTE PTR [rdx], al mov BYTE PTR [rdx+1], r8b movbe rax, QWORD PTR [rcx+56] movbe r8, QWORD PTR [rcx+48] mov QWORD PTR [rdx+2], rax mov QWORD PTR [rdx+10], r8 movbe rax, QWORD PTR [rcx+40] movbe r8, QWORD PTR [rcx+32] mov QWORD PTR [rdx+18], rax mov QWORD PTR [rdx+26], r8 movbe rax, QWORD PTR [rcx+24] movbe r8, QWORD PTR [rcx+16] mov QWORD PTR [rdx+34], rax mov QWORD PTR [rdx+42], r8 movbe rax, QWORD PTR [rcx+8] movbe r8, QWORD PTR [rcx] mov QWORD PTR [rdx+50], rax mov QWORD PTR [rdx+58], r8 ret sp_521_to_bin_movbe_9 ENDP _text ENDS ENDIF ; /* Shift number right by 1 bit. (r = a >> 1) ; * ; * r Result of right shift by 1. ; * a Number to shift. ; */ _text SEGMENT READONLY PARA sp_521_rshift_9 PROC push r12 mov rax, rcx mov rcx, r8 mov r8, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] shrd r8, r9, cl shrd r9, r10, cl shrd r10, r11, cl shrd r11, r12, cl mov QWORD PTR [rax], r8 mov QWORD PTR [rax+8], r9 mov QWORD PTR [rax+16], r10 mov QWORD PTR [rax+24], r11 mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rdx+64] shrd r12, r9, cl shrd r9, r10, cl shrd r10, r11, cl shrd r11, r8, cl mov QWORD PTR [rax+32], r12 mov QWORD PTR [rax+40], r9 mov QWORD PTR [rax+48], r10 mov QWORD PTR [rax+56], r11 shr r8, cl mov QWORD PTR [rax+64], r8 pop r12 ret sp_521_rshift_9 ENDP _text ENDS ; /* Shift number left by n bit. (r = a << n) ; * ; * r Result of left shift by n. ; * a Number to shift. ; * n Amoutnt o shift. ; */ _text SEGMENT READONLY PARA sp_521_lshift_9 PROC push r12 push r13 mov rax, rcx mov cl, r8b mov r12, 0 mov r13, QWORD PTR [rdx+32] mov r8, QWORD PTR [rdx+40] mov r9, QWORD PTR [rdx+48] mov r10, QWORD PTR [rdx+56] mov r11, QWORD PTR [rdx+64] shld r12, r11, cl shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+40], r8 mov QWORD PTR [rax+48], r9 mov QWORD PTR [rax+56], r10 mov QWORD PTR [rax+64], r11 mov QWORD PTR [rax+72], r12 mov r11, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+8], r8 mov QWORD PTR [rax+16], r9 mov QWORD PTR [rax+24], r10 mov QWORD PTR [rax+32], r13 shl r11, cl mov QWORD PTR [rax], r11 pop r13 pop r12 ret sp_521_lshift_9 ENDP _text ENDS ; /* Shift number left by n bit. (r = a << n) ; * ; * r Result of left shift by n. ; * a Number to shift. ; * n Amoutnt o shift. ; */ _text SEGMENT READONLY PARA sp_521_lshift_18 PROC push r12 push r13 mov rax, rcx mov cl, r8b mov r12, 0 mov r13, QWORD PTR [rdx+104] mov r8, QWORD PTR [rdx+112] mov r9, QWORD PTR [rdx+120] mov r10, QWORD PTR [rdx+128] mov r11, QWORD PTR [rdx+136] shld r12, r11, cl shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+112], r8 mov QWORD PTR [rax+120], r9 mov QWORD PTR [rax+128], r10 mov QWORD PTR [rax+136], r11 mov QWORD PTR [rax+144], r12 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rdx+80] mov r9, QWORD PTR [rdx+88] mov r10, QWORD PTR [rdx+96] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+80], r8 mov QWORD PTR [rax+88], r9 mov QWORD PTR [rax+96], r10 mov QWORD PTR [rax+104], r13 mov r13, QWORD PTR [rdx+40] mov r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rdx+56] mov r10, QWORD PTR [rdx+64] shld r11, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r13, cl mov QWORD PTR [rax+48], r8 mov QWORD PTR [rax+56], r9 mov QWORD PTR [rax+64], r10 mov QWORD PTR [rax+72], r11 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rdx+24] mov r10, QWORD PTR [rdx+32] shld r13, r10, cl shld r10, r9, cl shld r9, r8, cl shld r8, r11, cl mov QWORD PTR [rax+16], r8 mov QWORD PTR [rax+24], r9 mov QWORD PTR [rax+32], r10 mov QWORD PTR [rax+40], r13 mov r10, QWORD PTR [rdx] shld r11, r10, cl shl r10, cl mov QWORD PTR [rax], r10 mov QWORD PTR [rax+8], r11 pop r13 pop r12 ret sp_521_lshift_18 ENDP _text ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_521_sub_in_place_9 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] mov r9, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb r9, QWORD PTR [rdx+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], r9 sbb r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb r9, QWORD PTR [rdx+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], r9 sbb r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb r9, QWORD PTR [rdx+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], r9 sbb r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb r9, QWORD PTR [rdx+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], r9 sbb r8, QWORD PTR [rdx+64] mov QWORD PTR [rcx+64], r8 sbb rax, rax ret sp_521_sub_in_place_9 ENDP _text ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_521_mul_d_9 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+40] add r12, rax mov QWORD PTR [rcx+40], r12 adc r10, rdx adc r11, 0 ; A[6] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+48] add r10, rax mov QWORD PTR [rcx+48], r10 adc r11, rdx adc r12, 0 ; A[7] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+56] add r11, rax mov QWORD PTR [rcx+56], r11 adc r12, rdx adc r10, 0 ; A[8] * B mov rax, r8 mul QWORD PTR [r9+64] add r12, rax adc r10, rdx mov QWORD PTR [rcx+64], r12 mov QWORD PTR [rcx+72], r10 pop r12 ret sp_521_mul_d_9 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_521_mul_d_avx2_9 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; A[6] * B mulx r10, r9, QWORD PTR [rax+48] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; A[7] * B mulx r10, r9, QWORD PTR [rax+56] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; A[8] * B mulx r10, r9, QWORD PTR [rax+64] mov r12, r13 adcx r11, r9 adox r12, r10 adcx r12, r13 mov QWORD PTR [rcx+64], r11 mov QWORD PTR [rcx+72], r12 pop r13 pop r12 ret sp_521_mul_d_avx2_9 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_521_word_asm_9 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_521_word_asm_9 ENDP _text ENDS ENDIF ; /* Shift number right by 1 bit. (r = a >> 1) ; * ; * r Result of right shift by 1. ; * a Number to shift. ; */ _text SEGMENT READONLY PARA sp_521_rshift1_9 PROC push r12 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] shrd rax, r8, 1 shrd r8, r9, 1 shrd r9, r10, 1 shrd r10, r12, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r8 mov QWORD PTR [rcx+16], r9 mov QWORD PTR [rcx+24], r10 mov r8, QWORD PTR [rdx+40] mov r9, QWORD PTR [rdx+48] mov r10, QWORD PTR [rdx+56] mov rax, QWORD PTR [rdx+64] shrd r12, r8, 1 shrd r8, r9, 1 shrd r9, r10, 1 shrd r10, rax, 1 mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r8 mov QWORD PTR [rcx+48], r9 mov QWORD PTR [rcx+56], r10 shr rax, 1 mov QWORD PTR [rcx+64], rax pop r12 ret sp_521_rshift1_9 ENDP _text ENDS ; /* Divide the number by 2 mod the prime. (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus ; */ _text SEGMENT READONLY PARA sp_521_div2_mod_9 PROC push r12 mov rax, QWORD PTR [rdx] and rax, 1 je L_521_mod_inv_9_div2_mod_no_add mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] add rax, r10 adc r9, r11 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov rax, QWORD PTR [rdx+16] mov r9, QWORD PTR [rdx+24] mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] adc rax, r10 adc r9, r11 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r9 mov rax, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] adc rax, r10 adc r9, r11 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov rax, QWORD PTR [rdx+48] mov r9, QWORD PTR [rdx+56] mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] adc rax, r10 adc r9, r11 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r9 mov rax, QWORD PTR [rdx+64] mov r10, QWORD PTR [r8+64] adc rax, r10 mov QWORD PTR [rcx+64], rax L_521_mod_inv_9_div2_mod_no_add: mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] mov r12, QWORD PTR [rdx+32] shrd rax, r9, 1 shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, r12, 1 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] mov r11, QWORD PTR [rdx+56] mov rax, QWORD PTR [rdx+64] shrd r12, r9, 1 shrd r9, r10, 1 shrd r10, r11, 1 shrd r11, rax, 1 mov QWORD PTR [rcx+32], r12 mov QWORD PTR [rcx+40], r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 shr rax, 1 mov QWORD PTR [rcx+64], rax pop r12 ret sp_521_div2_mod_9 ENDP _text ENDS _text SEGMENT READONLY PARA sp_521_num_bits_9 PROC xor rax, rax mov rdx, QWORD PTR [rcx+64] cmp rdx, 0 je L_521_num_bits_9_end_512 mov rax, -1 bsr rax, rdx add rax, 513 jmp L_521_num_bits_9_done L_521_num_bits_9_end_512: mov rdx, QWORD PTR [rcx+56] cmp rdx, 0 je L_521_num_bits_9_end_448 mov rax, -1 bsr rax, rdx add rax, 449 jmp L_521_num_bits_9_done L_521_num_bits_9_end_448: mov rdx, QWORD PTR [rcx+48] cmp rdx, 0 je L_521_num_bits_9_end_384 mov rax, -1 bsr rax, rdx add rax, 385 jmp L_521_num_bits_9_done L_521_num_bits_9_end_384: mov rdx, QWORD PTR [rcx+40] cmp rdx, 0 je L_521_num_bits_9_end_320 mov rax, -1 bsr rax, rdx add rax, 321 jmp L_521_num_bits_9_done L_521_num_bits_9_end_320: mov rdx, QWORD PTR [rcx+32] cmp rdx, 0 je L_521_num_bits_9_end_256 mov rax, -1 bsr rax, rdx add rax, 257 jmp L_521_num_bits_9_done L_521_num_bits_9_end_256: mov rdx, QWORD PTR [rcx+24] cmp rdx, 0 je L_521_num_bits_9_end_192 mov rax, -1 bsr rax, rdx add rax, 193 jmp L_521_num_bits_9_done L_521_num_bits_9_end_192: mov rdx, QWORD PTR [rcx+16] cmp rdx, 0 je L_521_num_bits_9_end_128 mov rax, -1 bsr rax, rdx add rax, 129 jmp L_521_num_bits_9_done L_521_num_bits_9_end_128: mov rdx, QWORD PTR [rcx+8] cmp rdx, 0 je L_521_num_bits_9_end_64 mov rax, -1 bsr rax, rdx add rax, 65 jmp L_521_num_bits_9_done L_521_num_bits_9_end_64: mov rdx, QWORD PTR [rcx] cmp rdx, 0 je L_521_num_bits_9_end_0 mov rax, -1 bsr rax, rdx add rax, 1 jmp L_521_num_bits_9_done L_521_num_bits_9_end_0: L_521_num_bits_9_done: ret sp_521_num_bits_9 ENDP _text ENDS ENDIF IFDEF WOLFSSL_SP_1024 ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_1024_mul_16 PROC push r12 mov r9, rdx sub rsp, 128 ; A[0] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9] xor r12, r12 mov QWORD PTR [rsp], rax mov r11, rdx ; A[0] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+8], r11 ; A[0] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+16], r12 ; A[0] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+24], r10 ; A[0] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+32], r11 ; A[0] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+40], r12 ; A[0] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+48], r10 ; A[0] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+56], r11 ; A[0] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+64], r12 ; A[0] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+72], r10 ; A[0] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+80], r11 ; A[0] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+88], r12 ; A[0] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+96], r10 ; A[0] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[1] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+8] add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rsp+104], r11 ; A[0] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[1] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+8] add r12, rax adc r10, rdx adc r11, 0 ; A[2] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+16] add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+112], r12 ; A[0] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[1] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+8] add r10, rax adc r11, rdx adc r12, 0 ; A[2] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+16] add r10, rax adc r11, rdx adc r12, 0 ; A[3] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+24] add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[0] mov rax, QWORD PTR [r8] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rsp+120], r10 ; A[1] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+8] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[2] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+16] add r11, rax adc r12, rdx adc r10, 0 ; A[3] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+24] add r11, rax adc r12, rdx adc r10, 0 ; A[4] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+32] add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+128], r11 ; A[2] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+16] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[3] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+24] add r12, rax adc r10, rdx adc r11, 0 ; A[4] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+32] add r12, rax adc r10, rdx adc r11, 0 ; A[5] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+40] add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+136], r12 ; A[3] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+24] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[4] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+32] add r10, rax adc r11, rdx adc r12, 0 ; A[5] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+40] add r10, rax adc r11, rdx adc r12, 0 ; A[6] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+48] add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+144], r10 ; A[4] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+32] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[5] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+40] add r11, rax adc r12, rdx adc r10, 0 ; A[6] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+48] add r11, rax adc r12, rdx adc r10, 0 ; A[7] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+56] add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+152], r11 ; A[5] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+40] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[6] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+48] add r12, rax adc r10, rdx adc r11, 0 ; A[7] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+56] add r12, rax adc r10, rdx adc r11, 0 ; A[8] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+64] add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+160], r12 ; A[6] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+48] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[7] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+56] add r10, rax adc r11, rdx adc r12, 0 ; A[8] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+64] add r10, rax adc r11, rdx adc r12, 0 ; A[9] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+72] add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+168], r10 ; A[7] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+56] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[8] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+64] add r11, rax adc r12, rdx adc r10, 0 ; A[9] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+72] add r11, rax adc r12, rdx adc r10, 0 ; A[10] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+80] add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+176], r11 ; A[8] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+64] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[9] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+72] add r12, rax adc r10, rdx adc r11, 0 ; A[10] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+80] add r12, rax adc r10, rdx adc r11, 0 ; A[11] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+88] add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+184], r12 ; A[9] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+72] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[10] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+80] add r10, rax adc r11, rdx adc r12, 0 ; A[11] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+88] add r10, rax adc r11, rdx adc r12, 0 ; A[12] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+96] add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+192], r10 ; A[10] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+80] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[11] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+88] add r11, rax adc r12, rdx adc r10, 0 ; A[12] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+96] add r11, rax adc r12, rdx adc r10, 0 ; A[13] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+104] add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+200], r11 ; A[11] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+88] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[12] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+96] add r12, rax adc r10, rdx adc r11, 0 ; A[13] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+104] add r12, rax adc r10, rdx adc r11, 0 ; A[14] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+112] add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+208], r12 ; A[12] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+96] xor r12, r12 add r10, rax adc r11, rdx adc r12, 0 ; A[13] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+104] add r10, rax adc r11, rdx adc r12, 0 ; A[14] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+112] add r10, rax adc r11, rdx adc r12, 0 ; A[15] * B[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx adc r12, 0 mov QWORD PTR [rcx+216], r10 ; A[13] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+104] xor r10, r10 add r11, rax adc r12, rdx adc r10, 0 ; A[14] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+112] add r11, rax adc r12, rdx adc r10, 0 ; A[15] * B[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r9+120] add r11, rax adc r12, rdx adc r10, 0 mov QWORD PTR [rcx+224], r11 ; A[14] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+112] xor r11, r11 add r12, rax adc r10, rdx adc r11, 0 ; A[15] * B[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r9+120] add r12, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+232], r12 ; A[15] * B[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r9+120] add r10, rax adc r11, rdx mov QWORD PTR [rcx+240], r10 mov QWORD PTR [rcx+248], r11 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r10, QWORD PTR [rsp+16] mov r11, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r10, QWORD PTR [rsp+48] mov r11, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rsp+64] mov rdx, QWORD PTR [rsp+72] mov r10, QWORD PTR [rsp+80] mov r11, QWORD PTR [rsp+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], rdx mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rsp+96] mov rdx, QWORD PTR [rsp+104] mov r10, QWORD PTR [rsp+112] mov r11, QWORD PTR [rsp+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], rdx mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 add rsp, 128 pop r12 ret sp_1024_mul_16 ENDP _text ENDS ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_1024_sqr_16 PROC push r12 push r13 push r14 mov r8, rdx sub rsp, 128 ; A[0] * A[0] mov rax, QWORD PTR [r8] mul rax xor r11, r11 mov QWORD PTR [rsp], rax mov r10, rdx ; A[0] * A[1] mov rax, QWORD PTR [r8+8] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+8], r10 ; A[0] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[1] * A[1] mov rax, QWORD PTR [r8+8] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rsp+16], r11 ; A[0] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[1] * A[2] mov rax, QWORD PTR [r8+16] mul QWORD PTR [r8+8] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rsp+24], r9 ; A[0] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[1] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+8] add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[2] * A[2] mov rax, QWORD PTR [r8+16] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rsp+32], r10 ; A[0] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[3] mov rax, QWORD PTR [r8+24] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+40], r11 ; A[0] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[3] mov rax, QWORD PTR [r8+24] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+48], r9 ; A[0] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[4] mov rax, QWORD PTR [r8+32] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+56], r10 ; A[0] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[4] mov rax, QWORD PTR [r8+32] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+64], r11 ; A[0] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[5] mov rax, QWORD PTR [r8+40] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+72], r9 ; A[0] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[5] mov rax, QWORD PTR [r8+40] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+80], r10 ; A[0] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[6] mov rax, QWORD PTR [r8+48] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+88], r11 ; A[0] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[6] mov rax, QWORD PTR [r8+48] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+96], r9 ; A[0] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[7] mov rax, QWORD PTR [r8+56] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rsp+104], r10 ; A[0] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[7] mov rax, QWORD PTR [r8+56] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rsp+112], r11 ; A[0] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[1] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+8] add r12, rax adc r13, rdx adc r14, 0 ; A[2] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[8] mov rax, QWORD PTR [r8+64] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rsp+120], r9 ; A[1] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+8] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[2] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+16] add r12, rax adc r13, rdx adc r14, 0 ; A[3] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[8] mov rax, QWORD PTR [r8+64] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+128], r10 ; A[2] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+16] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[3] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+24] add r12, rax adc r13, rdx adc r14, 0 ; A[4] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[9] mov rax, QWORD PTR [r8+72] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+136], r11 ; A[3] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+24] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[4] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+32] add r12, rax adc r13, rdx adc r14, 0 ; A[5] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[9] mov rax, QWORD PTR [r8+72] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+144], r9 ; A[4] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+32] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[5] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+40] add r12, rax adc r13, rdx adc r14, 0 ; A[6] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[10] mov rax, QWORD PTR [r8+80] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+152], r10 ; A[5] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+40] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[6] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+48] add r12, rax adc r13, rdx adc r14, 0 ; A[7] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[10] mov rax, QWORD PTR [r8+80] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+160], r11 ; A[6] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+48] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[7] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+56] add r12, rax adc r13, rdx adc r14, 0 ; A[8] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[11] mov rax, QWORD PTR [r8+88] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+168], r9 ; A[7] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+56] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[8] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+64] add r12, rax adc r13, rdx adc r14, 0 ; A[9] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 ; A[11] * A[11] mov rax, QWORD PTR [r8+88] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+176], r10 ; A[8] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+64] xor r10, r10 xor r14, r14 mov r12, rax mov r13, rdx ; A[9] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+72] add r12, rax adc r13, rdx adc r14, 0 ; A[10] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 ; A[11] * A[12] mov rax, QWORD PTR [r8+96] mul QWORD PTR [r8+88] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r11, r12 adc r9, r13 adc r10, r14 mov QWORD PTR [rcx+184], r11 ; A[9] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+72] xor r11, r11 xor r14, r14 mov r12, rax mov r13, rdx ; A[10] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+80] add r12, rax adc r13, rdx adc r14, 0 ; A[11] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+88] add r12, rax adc r13, rdx adc r14, 0 ; A[12] * A[12] mov rax, QWORD PTR [r8+96] mul rax add r12, r12 adc r13, r13 adc r14, r14 add r12, rax adc r13, rdx adc r14, 0 add r9, r12 adc r10, r13 adc r11, r14 mov QWORD PTR [rcx+192], r9 ; A[10] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+80] xor r9, r9 xor r14, r14 mov r12, rax mov r13, rdx ; A[11] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+88] add r12, rax adc r13, rdx adc r14, 0 ; A[12] * A[13] mov rax, QWORD PTR [r8+104] mul QWORD PTR [r8+96] add r12, rax adc r13, rdx adc r14, 0 add r12, r12 adc r13, r13 adc r14, r14 add r10, r12 adc r11, r13 adc r9, r14 mov QWORD PTR [rcx+200], r10 ; A[11] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+88] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[12] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+96] add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 ; A[13] * A[13] mov rax, QWORD PTR [r8+104] mul rax add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+208], r11 ; A[12] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+96] xor r11, r11 add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 ; A[13] * A[14] mov rax, QWORD PTR [r8+112] mul QWORD PTR [r8+104] add r9, rax adc r10, rdx adc r11, 0 add r9, rax adc r10, rdx adc r11, 0 mov QWORD PTR [rcx+216], r9 ; A[13] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+104] xor r9, r9 add r10, rax adc r11, rdx adc r9, 0 add r10, rax adc r11, rdx adc r9, 0 ; A[14] * A[14] mov rax, QWORD PTR [r8+112] mul rax add r10, rax adc r11, rdx adc r9, 0 mov QWORD PTR [rcx+224], r10 ; A[14] * A[15] mov rax, QWORD PTR [r8+120] mul QWORD PTR [r8+112] xor r10, r10 add r11, rax adc r9, rdx adc r10, 0 add r11, rax adc r9, rdx adc r10, 0 mov QWORD PTR [rcx+232], r11 ; A[15] * A[15] mov rax, QWORD PTR [r8+120] mul rax add r9, rax adc r10, rdx mov QWORD PTR [rcx+240], r9 mov QWORD PTR [rcx+248], r10 mov rax, QWORD PTR [rsp] mov rdx, QWORD PTR [rsp+8] mov r12, QWORD PTR [rsp+16] mov r13, QWORD PTR [rsp+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], rdx mov QWORD PTR [rcx+16], r12 mov QWORD PTR [rcx+24], r13 mov rax, QWORD PTR [rsp+32] mov rdx, QWORD PTR [rsp+40] mov r12, QWORD PTR [rsp+48] mov r13, QWORD PTR [rsp+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], rdx mov QWORD PTR [rcx+48], r12 mov QWORD PTR [rcx+56], r13 mov rax, QWORD PTR [rsp+64] mov rdx, QWORD PTR [rsp+72] mov r12, QWORD PTR [rsp+80] mov r13, QWORD PTR [rsp+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], rdx mov QWORD PTR [rcx+80], r12 mov QWORD PTR [rcx+88], r13 mov rax, QWORD PTR [rsp+96] mov rdx, QWORD PTR [rsp+104] mov r12, QWORD PTR [rsp+112] mov r13, QWORD PTR [rsp+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], rdx mov QWORD PTR [rcx+112], r12 mov QWORD PTR [rcx+120], r13 add rsp, 128 pop r14 pop r13 pop r12 ret sp_1024_sqr_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * ; * r Result of multiplication. ; * a First number to multiply. ; * b Second number to multiply. ; */ _text SEGMENT READONLY PARA sp_1024_mul_avx2_16 PROC push rbx push rbp push r12 push r13 push r14 push r15 push rdi mov rbp, r8 mov r8, rcx mov r9, rdx sub rsp, 128 cmp r9, r8 mov rbx, rsp cmovne rbx, r8 cmp rbp, r8 cmove rbx, rsp add r8, 128 xor rdi, rdi mov rdx, QWORD PTR [r9] ; A[0] * B[0] mulx r11, r10, QWORD PTR [rbp] ; A[0] * B[1] mulx r12, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx], r10 adcx r11, rax ; A[0] * B[2] mulx r13, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+8], r11 adcx r12, rax ; A[0] * B[3] mulx r14, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+16], r12 adcx r13, rax mov QWORD PTR [rbx+24], r13 ; A[0] * B[4] mulx r10, rax, QWORD PTR [rbp+32] adcx r14, rax ; A[0] * B[5] mulx r11, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+32], r14 adcx r10, rax ; A[0] * B[6] mulx r12, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+40], r10 adcx r11, rax ; A[0] * B[7] mulx r13, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+48], r11 adcx r12, rax mov QWORD PTR [rbx+56], r12 ; A[0] * B[8] mulx r14, rax, QWORD PTR [rbp+64] adcx r13, rax ; A[0] * B[9] mulx r10, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+64], r13 adcx r14, rax ; A[0] * B[10] mulx r11, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+72], r14 adcx r10, rax ; A[0] * B[11] mulx r12, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+80], r10 adcx r11, rax mov QWORD PTR [rbx+88], r11 ; A[0] * B[12] mulx r13, rax, QWORD PTR [rbp+96] adcx r12, rax ; A[0] * B[13] mulx r14, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+96], r12 adcx r13, rax ; A[0] * B[14] mulx r10, rax, QWORD PTR [rbp+112] mov QWORD PTR [rbx+104], r13 adcx r14, rax ; A[0] * B[15] mulx r11, rax, QWORD PTR [rbp+120] mov QWORD PTR [rbx+112], r14 adcx r10, rax adcx r11, rdi mov r15, rdi adcx r15, rdi mov QWORD PTR [rbx+120], r10 mov QWORD PTR [r8], r11 mov rdx, QWORD PTR [r9+8] mov r11, QWORD PTR [rbx+8] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] ; A[1] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[1] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+8], r11 adcx r12, rax adox r13, rcx ; A[1] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r14, rcx ; A[1] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+24], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+32], r14 mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] ; A[1] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[1] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx ; A[1] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[1] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+64], r13 mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] ; A[1] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r14, rax adox r10, rcx ; A[1] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[1] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[1] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+96], r12 mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[1] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r13, rax adox r14, rcx ; A[1] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[1] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[1] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [rbx+120], r10 mov r12, rdi adcx r11, rax adox r12, rcx adcx r12, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8], r11 mov QWORD PTR [r8+8], r12 mov rdx, QWORD PTR [r9+16] mov r12, QWORD PTR [rbx+16] mov r13, QWORD PTR [rbx+24] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] ; A[2] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[2] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+16], r12 adcx r13, rax adox r14, rcx ; A[2] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+24], r13 adcx r14, rax adox r10, rcx ; A[2] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+32], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+40], r10 mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] ; A[2] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[2] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[2] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[2] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+72], r14 mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] ; A[2] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[2] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[2] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[2] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+104], r13 mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[2] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r14, rax adox r10, rcx ; A[2] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[2] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[2] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8], r11 mov r13, rdi adcx r12, rax adox r13, rcx adcx r13, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+8], r12 mov QWORD PTR [r8+16], r13 mov rdx, QWORD PTR [r9+24] mov r13, QWORD PTR [rbx+24] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] ; A[3] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r14, rcx ; A[3] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+24], r13 adcx r14, rax adox r10, rcx ; A[3] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+32], r14 adcx r10, rax adox r11, rcx ; A[3] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+48], r11 mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] ; A[3] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r13, rcx ; A[3] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[3] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[3] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+80], r10 mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] ; A[3] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[3] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[3] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[3] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+112], r14 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[3] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r10, rax adox r11, rcx ; A[3] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[3] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[3] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+8], r12 mov r14, rdi adcx r13, rax adox r14, rcx adcx r14, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+16], r13 mov QWORD PTR [r8+24], r14 mov rdx, QWORD PTR [r9+32] mov r14, QWORD PTR [rbx+32] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] ; A[4] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r14, rax adox r10, rcx ; A[4] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+32], r14 adcx r10, rax adox r11, rcx ; A[4] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx ; A[4] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+56], r12 mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] ; A[4] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r13, rax adox r14, rcx ; A[4] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[4] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[4] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+88], r11 mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[4] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r13, rcx ; A[4] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[4] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[4] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+120], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[4] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r11, rax adox r12, rcx ; A[4] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[4] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[4] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+16], r13 mov r10, rdi adcx r14, rax adox r10, rcx adcx r10, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+24], r14 mov QWORD PTR [r8+32], r10 mov rdx, QWORD PTR [r9+40] mov r10, QWORD PTR [rbx+40] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] ; A[5] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[5] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+40], r10 adcx r11, rax adox r12, rcx ; A[5] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[5] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+64], r13 mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] ; A[5] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r14, rax adox r10, rcx ; A[5] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[5] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[5] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+96], r12 mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[5] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r13, rax adox r14, rcx ; A[5] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[5] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[5] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] ; A[5] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r12, rax adox r13, rcx ; A[5] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[5] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[5] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+24], r14 mov r11, rdi adcx r10, rax adox r11, rcx adcx r11, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+32], r10 mov QWORD PTR [r8+40], r11 mov rdx, QWORD PTR [r9+48] mov r11, QWORD PTR [rbx+48] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] ; A[6] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[6] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+48], r11 adcx r12, rax adox r13, rcx ; A[6] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[6] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+72], r14 mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] ; A[6] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[6] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[6] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[6] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+104], r13 mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[6] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r14, rax adox r10, rcx ; A[6] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[6] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[6] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[6] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r13, rax adox r14, rcx ; A[6] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[6] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[6] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+32], r10 mov r12, rdi adcx r11, rax adox r12, rcx adcx r12, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+40], r11 mov QWORD PTR [r8+48], r12 mov rdx, QWORD PTR [r9+56] mov r12, QWORD PTR [rbx+56] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] ; A[7] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[7] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+56], r12 adcx r13, rax adox r14, rcx ; A[7] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[7] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+80], r10 mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] ; A[7] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[7] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[7] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[7] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+112], r14 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[7] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[7] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[7] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[7] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+16], r13 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] ; A[7] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r14, rax adox r10, rcx ; A[7] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[7] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[7] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+40], r11 mov r13, rdi adcx r12, rax adox r13, rcx adcx r13, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+48], r12 mov QWORD PTR [r8+56], r13 mov rdx, QWORD PTR [r9+64] mov r13, QWORD PTR [rbx+64] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] ; A[8] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r14, rcx ; A[8] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+64], r13 adcx r14, rax adox r10, rcx ; A[8] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[8] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [rbx+88], r11 mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[8] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r13, rcx ; A[8] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[8] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[8] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+120], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] ; A[8] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[8] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[8] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[8] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [r8+24], r14 mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] ; A[8] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r10, rax adox r11, rcx ; A[8] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[8] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[8] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+48], r12 mov r14, rdi adcx r13, rax adox r14, rcx adcx r14, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+56], r13 mov QWORD PTR [r8+64], r14 mov rdx, QWORD PTR [r9+72] mov r14, QWORD PTR [rbx+72] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] ; A[9] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r14, rax adox r10, rcx ; A[9] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+72], r14 adcx r10, rax adox r11, rcx ; A[9] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[9] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [rbx+96], r12 mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[9] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r13, rax adox r14, rcx ; A[9] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[9] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[9] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[9] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r13, rcx ; A[9] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[9] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[9] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+32], r10 mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] ; A[9] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r11, rax adox r12, rcx ; A[9] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[9] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[9] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+56], r13 mov r10, rdi adcx r14, rax adox r10, rcx adcx r10, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+64], r14 mov QWORD PTR [r8+72], r10 mov rdx, QWORD PTR [r9+80] mov r10, QWORD PTR [rbx+80] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] ; A[10] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[10] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+80], r10 adcx r11, rax adox r12, rcx ; A[10] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[10] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [rbx+104], r13 mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[10] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r14, rax adox r10, rcx ; A[10] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[10] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[10] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] ; A[10] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r13, rax adox r14, rcx ; A[10] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[10] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[10] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+40], r11 mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] ; A[10] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r12, rax adox r13, rcx ; A[10] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[10] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[10] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+64], r14 mov r11, rdi adcx r10, rax adox r11, rcx adcx r11, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+72], r10 mov QWORD PTR [r8+80], r11 mov rdx, QWORD PTR [r9+88] mov r11, QWORD PTR [rbx+88] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] ; A[11] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r11, rax adox r12, rcx ; A[11] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+88], r11 adcx r12, rax adox r13, rcx ; A[11] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[11] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [rbx+112], r14 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[11] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r10, rax adox r11, rcx ; A[11] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[11] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[11] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+16], r13 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] ; A[11] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r14, rax adox r10, rcx ; A[11] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[11] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[11] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+48], r12 mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] ; A[11] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r13, rax adox r14, rcx ; A[11] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[11] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx ; A[11] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+72], r10 mov r12, rdi adcx r11, rax adox r12, rcx adcx r12, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+80], r11 mov QWORD PTR [r8+88], r12 mov rdx, QWORD PTR [r9+96] mov r12, QWORD PTR [rbx+96] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] ; A[12] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r12, rax adox r13, rcx ; A[12] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+96], r12 adcx r13, rax adox r14, rcx ; A[12] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[12] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [rbx+120], r10 mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] ; A[12] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r11, rax adox r12, rcx ; A[12] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[12] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[12] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [r8+24], r14 mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] ; A[12] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r10, rax adox r11, rcx ; A[12] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[12] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[12] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+56], r13 mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] mov r12, QWORD PTR [r8+88] ; A[12] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r14, rax adox r10, rcx ; A[12] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx ; A[12] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+72], r10 adcx r11, rax adox r12, rcx ; A[12] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+80], r11 mov r13, rdi adcx r12, rax adox r13, rcx adcx r13, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+88], r12 mov QWORD PTR [r8+96], r13 mov rdx, QWORD PTR [r9+104] mov r13, QWORD PTR [rbx+104] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[13] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r13, rax adox r14, rcx ; A[13] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+104], r13 adcx r14, rax adox r10, rcx ; A[13] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[13] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8], r11 mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[13] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r12, rax adox r13, rcx ; A[13] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx ; A[13] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[13] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+32], r10 mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] ; A[13] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r11, rax adox r12, rcx ; A[13] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx ; A[13] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[13] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx mov QWORD PTR [r8+64], r14 mov r11, QWORD PTR [r8+80] mov r12, QWORD PTR [r8+88] mov r13, QWORD PTR [r8+96] ; A[13] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r10, rax adox r11, rcx ; A[13] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+72], r10 adcx r11, rax adox r12, rcx ; A[13] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+80], r11 adcx r12, rax adox r13, rcx ; A[13] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+88], r12 mov r14, rdi adcx r13, rax adox r14, rcx adcx r14, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+96], r13 mov QWORD PTR [r8+104], r14 mov rdx, QWORD PTR [r9+112] mov r14, QWORD PTR [rbx+112] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] ; A[14] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r14, rax adox r10, rcx ; A[14] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+112], r14 adcx r10, rax adox r11, rcx ; A[14] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[14] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+8], r12 mov r14, QWORD PTR [r8+24] mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] ; A[14] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r13, rax adox r14, rcx ; A[14] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+16], r13 adcx r14, rax adox r10, rcx ; A[14] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[14] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+40], r11 mov r13, QWORD PTR [r8+56] mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] ; A[14] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r12, rax adox r13, rcx ; A[14] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+48], r12 adcx r13, rax adox r14, rcx ; A[14] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[14] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx mov QWORD PTR [r8+72], r10 mov r12, QWORD PTR [r8+88] mov r13, QWORD PTR [r8+96] mov r14, QWORD PTR [r8+104] ; A[14] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r11, rax adox r12, rcx ; A[14] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+80], r11 adcx r12, rax adox r13, rcx ; A[14] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+88], r12 adcx r13, rax adox r14, rcx ; A[14] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+96], r13 mov r10, rdi adcx r14, rax adox r10, rcx adcx r10, r15 mov r15, rdi adox r15, rdi adcx r15, rdi mov QWORD PTR [r8+104], r14 mov QWORD PTR [r8+112], r10 mov rdx, QWORD PTR [r9+120] mov r10, QWORD PTR [rbx+120] mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] mov r13, QWORD PTR [r8+16] mov r14, QWORD PTR [r8+24] ; A[15] * B[0] mulx rcx, rax, QWORD PTR [rbp] adcx r10, rax adox r11, rcx ; A[15] * B[1] mulx rcx, rax, QWORD PTR [rbp+8] mov QWORD PTR [rbx+120], r10 adcx r11, rax adox r12, rcx ; A[15] * B[2] mulx rcx, rax, QWORD PTR [rbp+16] mov QWORD PTR [r8], r11 adcx r12, rax adox r13, rcx ; A[15] * B[3] mulx rcx, rax, QWORD PTR [rbp+24] mov QWORD PTR [r8+8], r12 adcx r13, rax adox r14, rcx mov QWORD PTR [r8+16], r13 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov r12, QWORD PTR [r8+48] mov r13, QWORD PTR [r8+56] ; A[15] * B[4] mulx rcx, rax, QWORD PTR [rbp+32] adcx r14, rax adox r10, rcx ; A[15] * B[5] mulx rcx, rax, QWORD PTR [rbp+40] mov QWORD PTR [r8+24], r14 adcx r10, rax adox r11, rcx ; A[15] * B[6] mulx rcx, rax, QWORD PTR [rbp+48] mov QWORD PTR [r8+32], r10 adcx r11, rax adox r12, rcx ; A[15] * B[7] mulx rcx, rax, QWORD PTR [rbp+56] mov QWORD PTR [r8+40], r11 adcx r12, rax adox r13, rcx mov QWORD PTR [r8+48], r12 mov r14, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] mov r11, QWORD PTR [r8+80] mov r12, QWORD PTR [r8+88] ; A[15] * B[8] mulx rcx, rax, QWORD PTR [rbp+64] adcx r13, rax adox r14, rcx ; A[15] * B[9] mulx rcx, rax, QWORD PTR [rbp+72] mov QWORD PTR [r8+56], r13 adcx r14, rax adox r10, rcx ; A[15] * B[10] mulx rcx, rax, QWORD PTR [rbp+80] mov QWORD PTR [r8+64], r14 adcx r10, rax adox r11, rcx ; A[15] * B[11] mulx rcx, rax, QWORD PTR [rbp+88] mov QWORD PTR [r8+72], r10 adcx r11, rax adox r12, rcx mov QWORD PTR [r8+80], r11 mov r13, QWORD PTR [r8+96] mov r14, QWORD PTR [r8+104] mov r10, QWORD PTR [r8+112] ; A[15] * B[12] mulx rcx, rax, QWORD PTR [rbp+96] adcx r12, rax adox r13, rcx ; A[15] * B[13] mulx rcx, rax, QWORD PTR [rbp+104] mov QWORD PTR [r8+88], r12 adcx r13, rax adox r14, rcx ; A[15] * B[14] mulx rcx, rax, QWORD PTR [rbp+112] mov QWORD PTR [r8+96], r13 adcx r14, rax adox r10, rcx ; A[15] * B[15] mulx rcx, rax, QWORD PTR [rbp+120] mov QWORD PTR [r8+104], r14 mov r11, rdi adcx r10, rax adox r11, rcx adcx r11, r15 mov QWORD PTR [r8+112], r10 mov QWORD PTR [r8+120], r11 sub r8, 128 cmp r9, r8 je L_start_1024_mul_avx2_16 cmp rbp, r8 jne L_end_1024_mul_avx2_16 L_start_1024_mul_avx2_16: vmovdqu xmm0, OWORD PTR [rbx] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbx+16] vmovups OWORD PTR [r8+16], xmm0 vmovdqu xmm0, OWORD PTR [rbx+32] vmovups OWORD PTR [r8+32], xmm0 vmovdqu xmm0, OWORD PTR [rbx+48] vmovups OWORD PTR [r8+48], xmm0 vmovdqu xmm0, OWORD PTR [rbx+64] vmovups OWORD PTR [r8+64], xmm0 vmovdqu xmm0, OWORD PTR [rbx+80] vmovups OWORD PTR [r8+80], xmm0 vmovdqu xmm0, OWORD PTR [rbx+96] vmovups OWORD PTR [r8+96], xmm0 vmovdqu xmm0, OWORD PTR [rbx+112] vmovups OWORD PTR [r8+112], xmm0 L_end_1024_mul_avx2_16: add rsp, 128 pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx ret sp_1024_mul_avx2_16 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ _text SEGMENT READONLY PARA sp_1024_sqr_avx2_16 PROC push rbp push r12 push r13 push r14 push r15 push rdi push rsi push rbx mov r8, rcx mov r9, rdx sub rsp, 128 cmp r9, r8 mov rbp, rsp cmovne rbp, r8 add r8, 128 xor r13, r13 ; Diagonal 1 ; Zero into %r9 ; Zero into %r10 ; A[1] x A[0] mov rdx, QWORD PTR [r9] mulx r11, r10, QWORD PTR [r9+8] ; A[2] x A[0] mulx r12, rax, QWORD PTR [r9+16] adcx r11, rax adox r12, r13 mov QWORD PTR [rbp+8], r10 mov QWORD PTR [rbp+16], r11 ; Zero into %r8 ; Zero into %r9 ; A[3] x A[0] mulx r10, rax, QWORD PTR [r9+24] adcx r12, rax adox r10, r13 ; A[4] x A[0] mulx r11, rax, QWORD PTR [r9+32] adcx r10, rax adox r11, r13 mov QWORD PTR [rbp+24], r12 mov QWORD PTR [rbp+32], r10 ; Zero into %r10 ; Zero into %r8 ; A[5] x A[0] mulx r12, rax, QWORD PTR [r9+40] adcx r11, rax adox r12, r13 ; A[6] x A[0] mulx r10, rax, QWORD PTR [r9+48] adcx r12, rax adox r10, r13 mov QWORD PTR [rbp+40], r11 mov QWORD PTR [rbp+48], r12 ; Zero into %r9 ; Zero into %r10 ; A[7] x A[0] mulx r11, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, r13 ; A[8] x A[0] mulx r12, rax, QWORD PTR [r9+64] adcx r11, rax adox r12, r13 mov QWORD PTR [rbp+56], r10 mov QWORD PTR [rbp+64], r11 ; Zero into %r8 ; Zero into %r9 ; A[9] x A[0] mulx r10, rax, QWORD PTR [r9+72] adcx r12, rax adox r10, r13 ; A[10] x A[0] mulx r11, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, r13 mov QWORD PTR [rbp+72], r12 mov QWORD PTR [rbp+80], r10 ; No load %r13 - %r10 ; A[11] x A[0] mulx r15, rax, QWORD PTR [r9+88] adcx r11, rax adox r15, r13 ; A[12] x A[0] mulx rdi, rax, QWORD PTR [r9+96] adcx r15, rax adox rdi, r13 mov QWORD PTR [rbp+88], r11 ; No store %r13 - %r10 ; No load %r15 - %r9 ; A[13] x A[0] mulx rsi, rax, QWORD PTR [r9+104] adcx rdi, rax adox rsi, r13 ; A[14] x A[0] mulx rbx, rax, QWORD PTR [r9+112] adcx rsi, rax adox rbx, r13 ; No store %r14 - %r8 ; No store %r15 - %r9 ; Zero into %r8 ; Zero into %r9 ; A[15] x A[0] mulx r10, rax, QWORD PTR [r9+120] adcx rbx, rax adox r10, r13 ; No store %rbx - %r10 ; Carry adcx r10, r13 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8], r10 ; Diagonal 2 mov r10, QWORD PTR [rbp+24] mov r11, QWORD PTR [rbp+32] mov r12, QWORD PTR [rbp+40] ; A[2] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, QWORD PTR [r9+16] adcx r10, rax adox r11, rcx ; A[3] x A[1] mulx rcx, rax, QWORD PTR [r9+24] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+24], r10 mov QWORD PTR [rbp+32], r11 mov r10, QWORD PTR [rbp+48] mov r11, QWORD PTR [rbp+56] ; A[4] x A[1] mulx rcx, rax, QWORD PTR [r9+32] adcx r12, rax adox r10, rcx ; A[5] x A[1] mulx rcx, rax, QWORD PTR [r9+40] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+40], r12 mov QWORD PTR [rbp+48], r10 mov r12, QWORD PTR [rbp+64] mov r10, QWORD PTR [rbp+72] ; A[6] x A[1] mulx rcx, rax, QWORD PTR [r9+48] adcx r11, rax adox r12, rcx ; A[7] x A[1] mulx rcx, rax, QWORD PTR [r9+56] adcx r12, rax adox r10, rcx mov QWORD PTR [rbp+56], r11 mov QWORD PTR [rbp+64], r12 mov r11, QWORD PTR [rbp+80] mov r12, QWORD PTR [rbp+88] ; A[8] x A[1] mulx rcx, rax, QWORD PTR [r9+64] adcx r10, rax adox r11, rcx ; A[9] x A[1] mulx rcx, rax, QWORD PTR [r9+72] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+72], r10 mov QWORD PTR [rbp+80], r11 ; No load %r13 - %r8 ; A[10] x A[1] mulx rcx, rax, QWORD PTR [r9+80] adcx r12, rax adox r15, rcx ; A[11] x A[1] mulx rcx, rax, QWORD PTR [r9+88] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r12 ; No store %r13 - %r8 ; No load %r15 - %r10 ; A[12] x A[1] mulx rcx, rax, QWORD PTR [r9+96] adcx rdi, rax adox rsi, rcx ; A[13] x A[1] mulx rcx, rax, QWORD PTR [r9+104] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r9 ; No store %r15 - %r10 mov r11, QWORD PTR [r8] ; Zero into %r10 ; A[14] x A[1] mulx rcx, rax, QWORD PTR [r9+112] adcx rbx, rax adox r11, rcx ; A[15] x A[1] mulx r12, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, r13 ; No store %rbx - %r8 mov QWORD PTR [r8], r11 ; Zero into %r8 ; Zero into %r9 ; A[15] x A[2] mov rdx, QWORD PTR [r9+16] mulx r10, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+8], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+16], r10 ; Diagonal 3 mov r10, QWORD PTR [rbp+40] mov r11, QWORD PTR [rbp+48] mov r12, QWORD PTR [rbp+56] ; A[3] x A[2] mulx rcx, rax, QWORD PTR [r9+24] adcx r10, rax adox r11, rcx ; A[4] x A[2] mulx rcx, rax, QWORD PTR [r9+32] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+40], r10 mov QWORD PTR [rbp+48], r11 mov r10, QWORD PTR [rbp+64] mov r11, QWORD PTR [rbp+72] ; A[5] x A[2] mulx rcx, rax, QWORD PTR [r9+40] adcx r12, rax adox r10, rcx ; A[6] x A[2] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+56], r12 mov QWORD PTR [rbp+64], r10 mov r12, QWORD PTR [rbp+80] mov r10, QWORD PTR [rbp+88] ; A[7] x A[2] mulx rcx, rax, QWORD PTR [r9+56] adcx r11, rax adox r12, rcx ; A[8] x A[2] mulx rcx, rax, QWORD PTR [r9+64] adcx r12, rax adox r10, rcx mov QWORD PTR [rbp+72], r11 mov QWORD PTR [rbp+80], r12 ; No load %r13 - %r9 ; A[9] x A[2] mulx rcx, rax, QWORD PTR [r9+72] adcx r10, rax adox r15, rcx ; A[10] x A[2] mulx rcx, rax, QWORD PTR [r9+80] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r10 ; No store %r13 - %r9 ; No load %r15 - %r8 ; A[11] x A[2] mulx rcx, rax, QWORD PTR [r9+88] adcx rdi, rax adox rsi, rcx ; A[12] x A[2] mulx rcx, rax, QWORD PTR [r9+96] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r10 ; No store %r15 - %r8 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [r8+8] ; A[13] x A[2] mulx rcx, rax, QWORD PTR [r9+104] adcx rbx, rax adox r12, rcx ; A[14] x A[2] mulx rcx, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, rcx ; No store %rbx - %r9 mov QWORD PTR [r8], r12 mov r11, QWORD PTR [r8+16] ; Zero into %r10 ; A[14] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+112] adcx r10, rax adox r11, rcx ; A[14] x A[4] mov rdx, QWORD PTR [r9+32] mulx r12, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+8], r10 mov QWORD PTR [r8+16], r11 ; Zero into %r8 ; Zero into %r9 ; A[14] x A[5] mov rdx, QWORD PTR [r9+40] mulx r10, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+24], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+32], r10 ; Diagonal 4 mov r10, QWORD PTR [rbp+56] mov r11, QWORD PTR [rbp+64] mov r12, QWORD PTR [rbp+72] ; A[4] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+32] adcx r10, rax adox r11, rcx ; A[5] x A[3] mulx rcx, rax, QWORD PTR [r9+40] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+56], r10 mov QWORD PTR [rbp+64], r11 mov r10, QWORD PTR [rbp+80] mov r11, QWORD PTR [rbp+88] ; A[6] x A[3] mulx rcx, rax, QWORD PTR [r9+48] adcx r12, rax adox r10, rcx ; A[7] x A[3] mulx rcx, rax, QWORD PTR [r9+56] adcx r10, rax adox r11, rcx mov QWORD PTR [rbp+72], r12 mov QWORD PTR [rbp+80], r10 ; No load %r13 - %r10 ; A[8] x A[3] mulx rcx, rax, QWORD PTR [r9+64] adcx r11, rax adox r15, rcx ; A[9] x A[3] mulx rcx, rax, QWORD PTR [r9+72] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r11 ; No store %r13 - %r10 ; No load %r15 - %r9 ; A[10] x A[3] mulx rcx, rax, QWORD PTR [r9+80] adcx rdi, rax adox rsi, rcx ; A[11] x A[3] mulx rcx, rax, QWORD PTR [r9+88] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r8 ; No store %r15 - %r9 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[12] x A[3] mulx rcx, rax, QWORD PTR [r9+96] adcx rbx, rax adox r10, rcx ; A[13] x A[3] mulx rcx, rax, QWORD PTR [r9+104] adcx r10, rax adox r11, rcx ; No store %rbx - %r10 mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[13] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+104] adcx r11, rax adox r12, rcx ; A[13] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+8], r11 mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] ; Zero into %r10 ; A[13] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+104] adcx r10, rax adox r11, rcx ; A[13] x A[7] mov rdx, QWORD PTR [r9+56] mulx r12, rax, QWORD PTR [r9+104] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+24], r10 mov QWORD PTR [r8+32], r11 ; Zero into %r8 ; Zero into %r9 ; A[13] x A[8] mov rdx, QWORD PTR [r9+64] mulx r10, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+40], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+48], r10 ; Diagonal 5 mov r10, QWORD PTR [rbp+72] mov r11, QWORD PTR [rbp+80] mov r12, QWORD PTR [rbp+88] ; A[5] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+40] adcx r10, rax adox r11, rcx ; A[6] x A[4] mulx rcx, rax, QWORD PTR [r9+48] adcx r11, rax adox r12, rcx mov QWORD PTR [rbp+72], r10 mov QWORD PTR [rbp+80], r11 ; No load %r13 - %r8 ; A[7] x A[4] mulx rcx, rax, QWORD PTR [r9+56] adcx r12, rax adox r15, rcx ; A[8] x A[4] mulx rcx, rax, QWORD PTR [r9+64] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r12 ; No store %r13 - %r8 ; No load %r15 - %r10 ; A[9] x A[4] mulx rcx, rax, QWORD PTR [r9+72] adcx rdi, rax adox rsi, rcx ; A[10] x A[4] mulx rcx, rax, QWORD PTR [r9+80] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r9 ; No store %r15 - %r10 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[11] x A[4] mulx rcx, rax, QWORD PTR [r9+88] adcx rbx, rax adox r11, rcx ; A[12] x A[4] mulx rcx, rax, QWORD PTR [r9+96] adcx r11, rax adox r12, rcx ; No store %rbx - %r8 mov QWORD PTR [r8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] ; A[12] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+96] adcx r12, rax adox r10, rcx ; A[12] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+96] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+8], r12 mov QWORD PTR [r8+16], r10 mov r12, QWORD PTR [r8+32] mov r10, QWORD PTR [r8+40] ; A[12] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+96] adcx r11, rax adox r12, rcx ; A[12] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+96] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+24], r11 mov QWORD PTR [r8+32], r12 mov r11, QWORD PTR [r8+48] ; Zero into %r10 ; A[12] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+96] adcx r10, rax adox r11, rcx ; A[12] x A[10] mov rdx, QWORD PTR [r9+80] mulx r12, rax, QWORD PTR [r9+96] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+40], r10 mov QWORD PTR [r8+48], r11 ; Zero into %r8 ; Zero into %r9 ; A[12] x A[11] mov rdx, QWORD PTR [r9+88] mulx r10, rax, QWORD PTR [r9+96] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+56], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+64], r10 ; Diagonal 6 mov r10, QWORD PTR [rbp+88] ; No load %r13 - %r9 ; A[6] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+48] adcx r10, rax adox r15, rcx ; A[7] x A[5] mulx rcx, rax, QWORD PTR [r9+56] adcx r15, rax adox rdi, rcx mov QWORD PTR [rbp+88], r10 ; No store %r13 - %r9 ; No load %r15 - %r8 ; A[8] x A[5] mulx rcx, rax, QWORD PTR [r9+64] adcx rdi, rax adox rsi, rcx ; A[9] x A[5] mulx rcx, rax, QWORD PTR [r9+72] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r10 ; No store %r15 - %r8 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [r8+8] ; A[10] x A[5] mulx rcx, rax, QWORD PTR [r9+80] adcx rbx, rax adox r12, rcx ; A[11] x A[5] mulx rcx, rax, QWORD PTR [r9+88] adcx r12, rax adox r10, rcx ; No store %rbx - %r9 mov QWORD PTR [r8], r12 mov r11, QWORD PTR [r8+16] mov r12, QWORD PTR [r8+24] ; A[11] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, rcx ; A[11] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+88] adcx r11, rax adox r12, rcx mov QWORD PTR [r8+8], r10 mov QWORD PTR [r8+16], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[11] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+88] adcx r12, rax adox r10, rcx ; A[11] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+88] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+24], r12 mov QWORD PTR [r8+32], r10 mov r12, QWORD PTR [r8+48] mov r10, QWORD PTR [r8+56] ; A[11] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+88] adcx r11, rax adox r12, rcx ; A[13] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+40], r11 mov QWORD PTR [r8+48], r12 mov r11, QWORD PTR [r8+64] ; Zero into %r10 ; A[13] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+104] adcx r10, rax adox r11, rcx ; A[13] x A[11] mov rdx, QWORD PTR [r9+88] mulx r12, rax, QWORD PTR [r9+104] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+56], r10 mov QWORD PTR [r8+64], r11 ; Zero into %r8 ; Zero into %r9 ; A[13] x A[12] mov rdx, QWORD PTR [r9+96] mulx r10, rax, QWORD PTR [r9+104] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+72], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+80], r10 ; Diagonal 7 ; No load %r15 - %r9 ; A[7] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+56] adcx rdi, rax adox rsi, rcx ; A[8] x A[6] mulx rcx, rax, QWORD PTR [r9+64] adcx rsi, rax adox rbx, rcx ; No store %r14 - %r8 ; No store %r15 - %r9 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[9] x A[6] mulx rcx, rax, QWORD PTR [r9+72] adcx rbx, rax adox r10, rcx ; A[10] x A[6] mulx rcx, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, rcx ; No store %rbx - %r10 mov QWORD PTR [r8], r10 mov r12, QWORD PTR [r8+16] mov r10, QWORD PTR [r8+24] ; A[10] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+80] adcx r11, rax adox r12, rcx ; A[10] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+80] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+8], r11 mov QWORD PTR [r8+16], r12 mov r11, QWORD PTR [r8+32] mov r12, QWORD PTR [r8+40] ; A[10] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+80] adcx r10, rax adox r11, rcx ; A[14] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, rcx mov QWORD PTR [r8+24], r10 mov QWORD PTR [r8+32], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] ; A[14] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, rcx ; A[14] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+112] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+40], r12 mov QWORD PTR [r8+48], r10 mov r12, QWORD PTR [r8+64] mov r10, QWORD PTR [r8+72] ; A[14] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, rcx ; A[14] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+56], r11 mov QWORD PTR [r8+64], r12 mov r11, QWORD PTR [r8+80] ; Zero into %r10 ; A[14] x A[11] mov rdx, QWORD PTR [r9+88] mulx rcx, rax, QWORD PTR [r9+112] adcx r10, rax adox r11, rcx ; A[14] x A[12] mov rdx, QWORD PTR [r9+96] mulx r12, rax, QWORD PTR [r9+112] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+72], r10 mov QWORD PTR [r8+80], r11 ; Zero into %r8 ; Zero into %r9 ; A[14] x A[13] mov rdx, QWORD PTR [r9+104] mulx r10, rax, QWORD PTR [r9+112] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+88], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+96], r10 ; Diagonal 8 mov r11, QWORD PTR [r8] mov r12, QWORD PTR [r8+8] ; A[8] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+64] adcx rbx, rax adox r11, rcx ; A[9] x A[7] mulx rcx, rax, QWORD PTR [r9+72] adcx r11, rax adox r12, rcx ; No store %rbx - %r8 mov QWORD PTR [r8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] ; A[9] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+72] adcx r12, rax adox r10, rcx ; A[15] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+8], r12 mov QWORD PTR [r8+16], r10 mov r12, QWORD PTR [r8+32] mov r10, QWORD PTR [r8+40] ; A[15] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, rcx ; A[15] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+24], r11 mov QWORD PTR [r8+32], r12 mov r11, QWORD PTR [r8+48] mov r12, QWORD PTR [r8+56] ; A[15] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx ; A[15] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, rcx mov QWORD PTR [r8+40], r10 mov QWORD PTR [r8+48], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] ; A[15] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, rcx ; A[15] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx mov QWORD PTR [r8+56], r12 mov QWORD PTR [r8+64], r10 mov r12, QWORD PTR [r8+80] mov r10, QWORD PTR [r8+88] ; A[15] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, rcx ; A[15] x A[11] mov rdx, QWORD PTR [r9+88] mulx rcx, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, rcx mov QWORD PTR [r8+72], r11 mov QWORD PTR [r8+80], r12 mov r11, QWORD PTR [r8+96] ; Zero into %r10 ; A[15] x A[12] mov rdx, QWORD PTR [r9+96] mulx rcx, rax, QWORD PTR [r9+120] adcx r10, rax adox r11, rcx ; A[15] x A[13] mov rdx, QWORD PTR [r9+104] mulx r12, rax, QWORD PTR [r9+120] adcx r11, rax adox r12, r13 mov QWORD PTR [r8+88], r10 mov QWORD PTR [r8+96], r11 ; Zero into %r8 ; Zero into %r9 ; A[15] x A[14] mov rdx, QWORD PTR [r9+112] mulx r10, rax, QWORD PTR [r9+120] adcx r12, rax adox r10, r13 mov QWORD PTR [r8+104], r12 ; Carry adcx r10, r14 mov r14, r13 adcx r14, r13 adox r14, r13 mov QWORD PTR [r8+112], r10 mov QWORD PTR [r8+120], r14 ; Double and Add in A[i] x A[i] mov r11, QWORD PTR [rbp+8] ; A[0] x A[0] mov rdx, QWORD PTR [r9] mulx rcx, rax, rdx mov QWORD PTR [rbp], rax adox r11, r11 adcx r11, rcx mov QWORD PTR [rbp+8], r11 mov r10, QWORD PTR [rbp+16] mov r11, QWORD PTR [rbp+24] ; A[1] x A[1] mov rdx, QWORD PTR [r9+8] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+16], r10 mov QWORD PTR [rbp+24], r11 mov r10, QWORD PTR [rbp+32] mov r11, QWORD PTR [rbp+40] ; A[2] x A[2] mov rdx, QWORD PTR [r9+16] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+32], r10 mov QWORD PTR [rbp+40], r11 mov r10, QWORD PTR [rbp+48] mov r11, QWORD PTR [rbp+56] ; A[3] x A[3] mov rdx, QWORD PTR [r9+24] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+48], r10 mov QWORD PTR [rbp+56], r11 mov r10, QWORD PTR [rbp+64] mov r11, QWORD PTR [rbp+72] ; A[4] x A[4] mov rdx, QWORD PTR [r9+32] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+64], r10 mov QWORD PTR [rbp+72], r11 mov r10, QWORD PTR [rbp+80] mov r11, QWORD PTR [rbp+88] ; A[5] x A[5] mov rdx, QWORD PTR [r9+40] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [rbp+80], r10 mov QWORD PTR [rbp+88], r11 ; A[6] x A[6] mov rdx, QWORD PTR [r9+48] mulx rcx, rax, rdx adox r15, r15 adox rdi, rdi adcx r15, rax adcx rdi, rcx ; A[7] x A[7] mov rdx, QWORD PTR [r9+56] mulx rcx, rax, rdx adox rsi, rsi adox rbx, rbx adcx rsi, rax adcx rbx, rcx mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] ; A[8] x A[8] mov rdx, QWORD PTR [r9+64] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8], r10 mov QWORD PTR [r8+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] ; A[9] x A[9] mov rdx, QWORD PTR [r9+72] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+16], r10 mov QWORD PTR [r8+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] ; A[10] x A[10] mov rdx, QWORD PTR [r9+80] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+32], r10 mov QWORD PTR [r8+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] ; A[11] x A[11] mov rdx, QWORD PTR [r9+88] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+48], r10 mov QWORD PTR [r8+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] ; A[12] x A[12] mov rdx, QWORD PTR [r9+96] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+64], r10 mov QWORD PTR [r8+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] ; A[13] x A[13] mov rdx, QWORD PTR [r9+104] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+80], r10 mov QWORD PTR [r8+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] ; A[14] x A[14] mov rdx, QWORD PTR [r9+112] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+96], r10 mov QWORD PTR [r8+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] ; A[15] x A[15] mov rdx, QWORD PTR [r9+120] mulx rcx, rax, rdx adox r10, r10 adox r11, r11 adcx r10, rax adcx r11, rcx mov QWORD PTR [r8+112], r10 mov QWORD PTR [r8+120], r11 mov QWORD PTR [r8+-32], r15 mov QWORD PTR [r8+-24], rdi mov QWORD PTR [r8+-16], rsi mov QWORD PTR [r8+-8], rbx sub r8, 128 cmp r9, r8 jne L_end_1024_sqr_avx2_16 vmovdqu xmm0, OWORD PTR [rbp] vmovups OWORD PTR [r8], xmm0 vmovdqu xmm0, OWORD PTR [rbp+16] vmovups OWORD PTR [r8+16], xmm0 vmovdqu xmm0, OWORD PTR [rbp+32] vmovups OWORD PTR [r8+32], xmm0 vmovdqu xmm0, OWORD PTR [rbp+48] vmovups OWORD PTR [r8+48], xmm0 vmovdqu xmm0, OWORD PTR [rbp+64] vmovups OWORD PTR [r8+64], xmm0 vmovdqu xmm0, OWORD PTR [rbp+80] vmovups OWORD PTR [r8+80], xmm0 L_end_1024_sqr_avx2_16: add rsp, 128 pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop rbp ret sp_1024_sqr_avx2_16 ENDP _text ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_1024_add_16 PROC ; Add mov r9, QWORD PTR [rdx] xor rax, rax add r9, QWORD PTR [r8] mov r10, QWORD PTR [rdx+8] mov QWORD PTR [rcx], r9 adc r10, QWORD PTR [r8+8] mov r9, QWORD PTR [rdx+16] mov QWORD PTR [rcx+8], r10 adc r9, QWORD PTR [r8+16] mov r10, QWORD PTR [rdx+24] mov QWORD PTR [rcx+16], r9 adc r10, QWORD PTR [r8+24] mov r9, QWORD PTR [rdx+32] mov QWORD PTR [rcx+24], r10 adc r9, QWORD PTR [r8+32] mov r10, QWORD PTR [rdx+40] mov QWORD PTR [rcx+32], r9 adc r10, QWORD PTR [r8+40] mov r9, QWORD PTR [rdx+48] mov QWORD PTR [rcx+40], r10 adc r9, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+56] mov QWORD PTR [rcx+48], r9 adc r10, QWORD PTR [r8+56] mov r9, QWORD PTR [rdx+64] mov QWORD PTR [rcx+56], r10 adc r9, QWORD PTR [r8+64] mov r10, QWORD PTR [rdx+72] mov QWORD PTR [rcx+64], r9 adc r10, QWORD PTR [r8+72] mov r9, QWORD PTR [rdx+80] mov QWORD PTR [rcx+72], r10 adc r9, QWORD PTR [r8+80] mov r10, QWORD PTR [rdx+88] mov QWORD PTR [rcx+80], r9 adc r10, QWORD PTR [r8+88] mov r9, QWORD PTR [rdx+96] mov QWORD PTR [rcx+88], r10 adc r9, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+104] mov QWORD PTR [rcx+96], r9 adc r10, QWORD PTR [r8+104] mov r9, QWORD PTR [rdx+112] mov QWORD PTR [rcx+104], r10 adc r9, QWORD PTR [r8+112] mov r10, QWORD PTR [rdx+120] mov QWORD PTR [rcx+112], r9 adc r10, QWORD PTR [r8+120] mov QWORD PTR [rcx+120], r10 adc rax, 0 ret sp_1024_add_16 ENDP _text ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ _text SEGMENT READONLY PARA sp_1024_sub_in_place_16 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] mov r9, QWORD PTR [rcx+8] mov QWORD PTR [rcx], r8 sbb r9, QWORD PTR [rdx+8] mov r8, QWORD PTR [rcx+16] mov QWORD PTR [rcx+8], r9 sbb r8, QWORD PTR [rdx+16] mov r9, QWORD PTR [rcx+24] mov QWORD PTR [rcx+16], r8 sbb r9, QWORD PTR [rdx+24] mov r8, QWORD PTR [rcx+32] mov QWORD PTR [rcx+24], r9 sbb r8, QWORD PTR [rdx+32] mov r9, QWORD PTR [rcx+40] mov QWORD PTR [rcx+32], r8 sbb r9, QWORD PTR [rdx+40] mov r8, QWORD PTR [rcx+48] mov QWORD PTR [rcx+40], r9 sbb r8, QWORD PTR [rdx+48] mov r9, QWORD PTR [rcx+56] mov QWORD PTR [rcx+48], r8 sbb r9, QWORD PTR [rdx+56] mov r8, QWORD PTR [rcx+64] mov QWORD PTR [rcx+56], r9 sbb r8, QWORD PTR [rdx+64] mov r9, QWORD PTR [rcx+72] mov QWORD PTR [rcx+64], r8 sbb r9, QWORD PTR [rdx+72] mov r8, QWORD PTR [rcx+80] mov QWORD PTR [rcx+72], r9 sbb r8, QWORD PTR [rdx+80] mov r9, QWORD PTR [rcx+88] mov QWORD PTR [rcx+80], r8 sbb r9, QWORD PTR [rdx+88] mov r8, QWORD PTR [rcx+96] mov QWORD PTR [rcx+88], r9 sbb r8, QWORD PTR [rdx+96] mov r9, QWORD PTR [rcx+104] mov QWORD PTR [rcx+96], r8 sbb r9, QWORD PTR [rdx+104] mov r8, QWORD PTR [rcx+112] mov QWORD PTR [rcx+104], r9 sbb r8, QWORD PTR [rdx+112] mov r9, QWORD PTR [rcx+120] mov QWORD PTR [rcx+112], r8 sbb r9, QWORD PTR [rdx+120] mov QWORD PTR [rcx+120], r9 sbb rax, rax ret sp_1024_sub_in_place_16 ENDP _text ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_1024_cond_sub_16 PROC sub rsp, 128 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r9 and r11, r9 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r9 and r11, r9 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r9 and r11, r9 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r9 and r11, r9 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r9 and r11, r9 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r9 and r11, r9 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r9 and r11, r9 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r9 and r11, r9 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov r10, QWORD PTR [rdx] mov r8, QWORD PTR [rsp] sub r10, r8 mov r11, QWORD PTR [rdx+8] mov r8, QWORD PTR [rsp+8] sbb r11, r8 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rdx+16] mov r8, QWORD PTR [rsp+16] sbb r10, r8 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rdx+24] mov r8, QWORD PTR [rsp+24] sbb r11, r8 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rdx+32] mov r8, QWORD PTR [rsp+32] sbb r10, r8 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rdx+40] mov r8, QWORD PTR [rsp+40] sbb r11, r8 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rdx+48] mov r8, QWORD PTR [rsp+48] sbb r10, r8 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rdx+56] mov r8, QWORD PTR [rsp+56] sbb r11, r8 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rdx+64] mov r8, QWORD PTR [rsp+64] sbb r10, r8 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rdx+72] mov r8, QWORD PTR [rsp+72] sbb r11, r8 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rdx+80] mov r8, QWORD PTR [rsp+80] sbb r10, r8 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rdx+88] mov r8, QWORD PTR [rsp+88] sbb r11, r8 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rdx+96] mov r8, QWORD PTR [rsp+96] sbb r10, r8 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rdx+104] mov r8, QWORD PTR [rsp+104] sbb r11, r8 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rdx+112] mov r8, QWORD PTR [rsp+112] sbb r10, r8 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rdx+120] mov r8, QWORD PTR [rsp+120] sbb r11, r8 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb rax, rax add rsp, 128 ret sp_1024_cond_sub_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * ; * r A single precision number representing condition subtract result. ; * a A single precision number to subtract from. ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_1024_cond_sub_avx2_16 PROC push r12 mov r12, QWORD PTR [r8] mov r10, QWORD PTR [rdx] pext r12, r12, r9 sub r10, r12 mov r12, QWORD PTR [r8+8] mov r11, QWORD PTR [rdx+8] pext r12, r12, r9 mov QWORD PTR [rcx], r10 sbb r11, r12 mov r10, QWORD PTR [r8+16] mov r12, QWORD PTR [rdx+16] pext r10, r10, r9 mov QWORD PTR [rcx+8], r11 sbb r12, r10 mov r11, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+24] pext r11, r11, r9 mov QWORD PTR [rcx+16], r12 sbb r10, r11 mov r12, QWORD PTR [r8+32] mov r11, QWORD PTR [rdx+32] pext r12, r12, r9 mov QWORD PTR [rcx+24], r10 sbb r11, r12 mov r10, QWORD PTR [r8+40] mov r12, QWORD PTR [rdx+40] pext r10, r10, r9 mov QWORD PTR [rcx+32], r11 sbb r12, r10 mov r11, QWORD PTR [r8+48] mov r10, QWORD PTR [rdx+48] pext r11, r11, r9 mov QWORD PTR [rcx+40], r12 sbb r10, r11 mov r12, QWORD PTR [r8+56] mov r11, QWORD PTR [rdx+56] pext r12, r12, r9 mov QWORD PTR [rcx+48], r10 sbb r11, r12 mov r10, QWORD PTR [r8+64] mov r12, QWORD PTR [rdx+64] pext r10, r10, r9 mov QWORD PTR [rcx+56], r11 sbb r12, r10 mov r11, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+72] pext r11, r11, r9 mov QWORD PTR [rcx+64], r12 sbb r10, r11 mov r12, QWORD PTR [r8+80] mov r11, QWORD PTR [rdx+80] pext r12, r12, r9 mov QWORD PTR [rcx+72], r10 sbb r11, r12 mov r10, QWORD PTR [r8+88] mov r12, QWORD PTR [rdx+88] pext r10, r10, r9 mov QWORD PTR [rcx+80], r11 sbb r12, r10 mov r11, QWORD PTR [r8+96] mov r10, QWORD PTR [rdx+96] pext r11, r11, r9 mov QWORD PTR [rcx+88], r12 sbb r10, r11 mov r12, QWORD PTR [r8+104] mov r11, QWORD PTR [rdx+104] pext r12, r12, r9 mov QWORD PTR [rcx+96], r10 sbb r11, r12 mov r10, QWORD PTR [r8+112] mov r12, QWORD PTR [rdx+112] pext r10, r10, r9 mov QWORD PTR [rcx+104], r11 sbb r12, r10 mov r11, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+120] pext r11, r11, r9 mov QWORD PTR [rcx+112], r12 sbb r10, r11 mov QWORD PTR [rcx+120], r10 sbb rax, rax pop r12 ret sp_1024_cond_sub_avx2_16 ENDP _text ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_1024_mul_d_16 PROC push r12 mov r9, rdx ; A[0] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9] mov r10, rax mov r11, rdx mov QWORD PTR [rcx], r10 ; A[1] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+8] add r11, rax mov QWORD PTR [rcx+8], r11 adc r12, rdx adc r10, 0 ; A[2] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+16] add r12, rax mov QWORD PTR [rcx+16], r12 adc r10, rdx adc r11, 0 ; A[3] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+24] add r10, rax mov QWORD PTR [rcx+24], r10 adc r11, rdx adc r12, 0 ; A[4] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+32] add r11, rax mov QWORD PTR [rcx+32], r11 adc r12, rdx adc r10, 0 ; A[5] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+40] add r12, rax mov QWORD PTR [rcx+40], r12 adc r10, rdx adc r11, 0 ; A[6] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+48] add r10, rax mov QWORD PTR [rcx+48], r10 adc r11, rdx adc r12, 0 ; A[7] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+56] add r11, rax mov QWORD PTR [rcx+56], r11 adc r12, rdx adc r10, 0 ; A[8] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+64] add r12, rax mov QWORD PTR [rcx+64], r12 adc r10, rdx adc r11, 0 ; A[9] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+72] add r10, rax mov QWORD PTR [rcx+72], r10 adc r11, rdx adc r12, 0 ; A[10] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+80] add r11, rax mov QWORD PTR [rcx+80], r11 adc r12, rdx adc r10, 0 ; A[11] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+88] add r12, rax mov QWORD PTR [rcx+88], r12 adc r10, rdx adc r11, 0 ; A[12] * B mov rax, r8 xor r12, r12 mul QWORD PTR [r9+96] add r10, rax mov QWORD PTR [rcx+96], r10 adc r11, rdx adc r12, 0 ; A[13] * B mov rax, r8 xor r10, r10 mul QWORD PTR [r9+104] add r11, rax mov QWORD PTR [rcx+104], r11 adc r12, rdx adc r10, 0 ; A[14] * B mov rax, r8 xor r11, r11 mul QWORD PTR [r9+112] add r12, rax mov QWORD PTR [rcx+112], r12 adc r10, rdx adc r11, 0 ; A[15] * B mov rax, r8 mul QWORD PTR [r9+120] add r10, rax adc r11, rdx mov QWORD PTR [rcx+120], r10 mov QWORD PTR [rcx+128], r11 pop r12 ret sp_1024_mul_d_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ _text SEGMENT READONLY PARA sp_1024_mul_d_avx2_16 PROC push r12 push r13 mov rax, rdx ; A[0] * B mov rdx, r8 xor r13, r13 mulx r12, r11, QWORD PTR [rax] mov QWORD PTR [rcx], r11 ; A[1] * B mulx r10, r9, QWORD PTR [rax+8] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+8], r12 ; A[2] * B mulx r10, r9, QWORD PTR [rax+16] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+16], r11 ; A[3] * B mulx r10, r9, QWORD PTR [rax+24] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+24], r12 ; A[4] * B mulx r10, r9, QWORD PTR [rax+32] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+32], r11 ; A[5] * B mulx r10, r9, QWORD PTR [rax+40] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+40], r12 ; A[6] * B mulx r10, r9, QWORD PTR [rax+48] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+48], r11 ; A[7] * B mulx r10, r9, QWORD PTR [rax+56] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+56], r12 ; A[8] * B mulx r10, r9, QWORD PTR [rax+64] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+64], r11 ; A[9] * B mulx r10, r9, QWORD PTR [rax+72] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+72], r12 ; A[10] * B mulx r10, r9, QWORD PTR [rax+80] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+80], r11 ; A[11] * B mulx r10, r9, QWORD PTR [rax+88] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+88], r12 ; A[12] * B mulx r10, r9, QWORD PTR [rax+96] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+96], r11 ; A[13] * B mulx r10, r9, QWORD PTR [rax+104] mov r11, r13 adcx r12, r9 adox r11, r10 mov QWORD PTR [rcx+104], r12 ; A[14] * B mulx r10, r9, QWORD PTR [rax+112] mov r12, r13 adcx r11, r9 adox r12, r10 mov QWORD PTR [rcx+112], r11 ; A[15] * B mulx r10, r9, QWORD PTR [rax+120] mov r11, r13 adcx r12, r9 adox r11, r10 adcx r11, r13 mov QWORD PTR [rcx+120], r12 mov QWORD PTR [rcx+128], r11 pop r13 pop r12 ret sp_1024_mul_d_avx2_16 ENDP _text ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) ; * ; * d1 The high order half of the number to divide. ; * d0 The low order half of the number to divide. ; * div The dividend. ; * returns the result of the division. ; */ _text SEGMENT READONLY PARA div_1024_word_asm_16 PROC mov r9, rdx mov rax, r9 mov rdx, rcx div r8 ret div_1024_word_asm_16 ENDP _text ENDS ENDIF ; /* Compare a with b in constant time. ; * ; * a A single precision integer. ; * b A single precision integer. ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ _text SEGMENT READONLY PARA sp_1024_cmp_16 PROC push r12 xor r9, r9 mov r8, -1 mov rax, -1 mov r10, 1 mov r11, QWORD PTR [rcx+120] mov r12, QWORD PTR [rdx+120] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+112] mov r12, QWORD PTR [rdx+112] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+104] mov r12, QWORD PTR [rdx+104] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+96] mov r12, QWORD PTR [rdx+96] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+88] mov r12, QWORD PTR [rdx+88] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+80] mov r12, QWORD PTR [rdx+80] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+72] mov r12, QWORD PTR [rdx+72] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+64] mov r12, QWORD PTR [rdx+64] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+56] mov r12, QWORD PTR [rdx+56] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+48] mov r12, QWORD PTR [rdx+48] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+40] mov r12, QWORD PTR [rdx+40] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+32] mov r12, QWORD PTR [rdx+32] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+24] mov r12, QWORD PTR [rdx+24] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+16] mov r12, QWORD PTR [rdx+16] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx+8] mov r12, QWORD PTR [rdx+8] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 mov r11, QWORD PTR [rcx] mov r12, QWORD PTR [rdx] and r11, r8 and r12, r8 sub r11, r12 cmova rax, r10 cmovc rax, r8 cmovnz r8, r9 xor rax, r8 pop r12 ret sp_1024_cmp_16 ENDP _text ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * ; * r A single precision number to copy over. ; * a A single precision number to copy. ; * m Mask value to apply. ; */ _text SEGMENT READONLY PARA sp_1024_cond_copy_16 PROC mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [rcx+16] mov r11, QWORD PTR [rcx+24] xor rax, QWORD PTR [rdx] xor r9, QWORD PTR [rdx+8] xor r10, QWORD PTR [rdx+16] xor r11, QWORD PTR [rdx+24] and rax, r8 and r9, r8 and r10, r8 and r11, r8 xor QWORD PTR [rcx], rax xor QWORD PTR [rcx+8], r9 xor QWORD PTR [rcx+16], r10 xor QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] mov r10, QWORD PTR [rcx+48] mov r11, QWORD PTR [rcx+56] xor rax, QWORD PTR [rdx+32] xor r9, QWORD PTR [rdx+40] xor r10, QWORD PTR [rdx+48] xor r11, QWORD PTR [rdx+56] and rax, r8 and r9, r8 and r10, r8 and r11, r8 xor QWORD PTR [rcx+32], rax xor QWORD PTR [rcx+40], r9 xor QWORD PTR [rcx+48], r10 xor QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] mov r10, QWORD PTR [rcx+80] mov r11, QWORD PTR [rcx+88] xor rax, QWORD PTR [rdx+64] xor r9, QWORD PTR [rdx+72] xor r10, QWORD PTR [rdx+80] xor r11, QWORD PTR [rdx+88] and rax, r8 and r9, r8 and r10, r8 and r11, r8 xor QWORD PTR [rcx+64], rax xor QWORD PTR [rcx+72], r9 xor QWORD PTR [rcx+80], r10 xor QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [rcx+112] mov r11, QWORD PTR [rcx+120] xor rax, QWORD PTR [rdx+96] xor r9, QWORD PTR [rdx+104] xor r10, QWORD PTR [rdx+112] xor r11, QWORD PTR [rdx+120] and rax, r8 and r9, r8 and r10, r8 and r11, r8 xor QWORD PTR [rcx+96], rax xor QWORD PTR [rcx+104], r9 xor QWORD PTR [rcx+112], r10 xor QWORD PTR [rcx+120], r11 ret sp_1024_cond_copy_16 ENDP _text ENDS ; /* Reduce the number back to 1024 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_1024_mont_reduce_16 PROC push r12 push r13 push r14 push r15 push rdi push rsi mov r9, rdx xor rsi, rsi ; i = 16 mov r10, 16 mov r15, QWORD PTR [rcx] mov rdi, QWORD PTR [rcx+8] L_1024_mont_reduce_16_loop: ; mu = a[i] * mp mov r13, r15 imul r13, r8 ; a[i+0] += m[0] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9] add r15, rax adc r12, rdx ; a[i+1] += m[1] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+8] mov r15, rdi add r15, rax adc r11, rdx add r15, r12 adc r11, 0 ; a[i+2] += m[2] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+16] mov rdi, QWORD PTR [rcx+16] add rdi, rax adc r12, rdx add rdi, r11 adc r12, 0 ; a[i+3] += m[3] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+24] mov r14, QWORD PTR [rcx+24] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+24], r14 adc r11, 0 ; a[i+4] += m[4] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+32] mov r14, QWORD PTR [rcx+32] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+32], r14 adc r12, 0 ; a[i+5] += m[5] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+40] mov r14, QWORD PTR [rcx+40] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+40], r14 adc r11, 0 ; a[i+6] += m[6] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+48] mov r14, QWORD PTR [rcx+48] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+48], r14 adc r12, 0 ; a[i+7] += m[7] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+56] mov r14, QWORD PTR [rcx+56] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+56], r14 adc r11, 0 ; a[i+8] += m[8] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+64] mov r14, QWORD PTR [rcx+64] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+64], r14 adc r12, 0 ; a[i+9] += m[9] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+72] mov r14, QWORD PTR [rcx+72] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+72], r14 adc r11, 0 ; a[i+10] += m[10] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+80] mov r14, QWORD PTR [rcx+80] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+80], r14 adc r12, 0 ; a[i+11] += m[11] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+88] mov r14, QWORD PTR [rcx+88] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+88], r14 adc r11, 0 ; a[i+12] += m[12] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+96] mov r14, QWORD PTR [rcx+96] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+96], r14 adc r12, 0 ; a[i+13] += m[13] * mu mov rax, r13 xor r11, r11 mul QWORD PTR [r9+104] mov r14, QWORD PTR [rcx+104] add r14, rax adc r11, rdx add r14, r12 mov QWORD PTR [rcx+104], r14 adc r11, 0 ; a[i+14] += m[14] * mu mov rax, r13 xor r12, r12 mul QWORD PTR [r9+112] mov r14, QWORD PTR [rcx+112] add r14, rax adc r12, rdx add r14, r11 mov QWORD PTR [rcx+112], r14 adc r12, 0 ; a[i+15] += m[15] * mu mov rax, r13 mul QWORD PTR [r9+120] mov r14, QWORD PTR [rcx+120] add r12, rax adc rdx, rsi mov rsi, 0 adc rsi, 0 add r14, r12 mov QWORD PTR [rcx+120], r14 adc QWORD PTR [rcx+128], rdx adc rsi, 0 ; i -= 1 add rcx, 8 dec r10 jnz L_1024_mont_reduce_16_loop mov r14, QWORD PTR [rcx+120] mov QWORD PTR [rcx], r15 sub r14, QWORD PTR [r9+120] mov QWORD PTR [rcx+8], rdi sbb r14, r14 neg rsi not r14 or rsi, r14 IFDEF _WIN64 mov r8, r9 mov r9, rsi ELSE mov r9, rsi mov r8, r9 ENDIF mov rdx, rcx mov rcx, rcx sub rcx, 128 call sp_1024_cond_sub_16 pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_1024_mont_reduce_16 ENDP _text ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_add_16 PROC push r12 push r13 sub rsp, 128 mov rax, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] add rax, QWORD PTR [r8] mov r13, 0 adc r10, QWORD PTR [r8+8] adc r11, QWORD PTR [r8+16] adc r12, QWORD PTR [r8+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 mov rax, QWORD PTR [rdx+32] mov r10, QWORD PTR [rdx+40] mov r11, QWORD PTR [rdx+48] mov r12, QWORD PTR [rdx+56] adc rax, QWORD PTR [r8+32] adc r10, QWORD PTR [r8+40] adc r11, QWORD PTR [r8+48] adc r12, QWORD PTR [r8+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov QWORD PTR [rcx+48], r11 mov QWORD PTR [rcx+56], r12 mov rax, QWORD PTR [rdx+64] mov r10, QWORD PTR [rdx+72] mov r11, QWORD PTR [rdx+80] mov r12, QWORD PTR [rdx+88] adc rax, QWORD PTR [r8+64] adc r10, QWORD PTR [r8+72] adc r11, QWORD PTR [r8+80] adc r12, QWORD PTR [r8+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov QWORD PTR [rcx+80], r11 mov QWORD PTR [rcx+88], r12 mov rax, QWORD PTR [rdx+96] mov r10, QWORD PTR [rdx+104] mov r11, QWORD PTR [rdx+112] mov r12, QWORD PTR [rdx+120] adc rax, QWORD PTR [r8+96] adc r10, QWORD PTR [r8+104] adc r11, QWORD PTR [r8+112] adc r12, QWORD PTR [r8+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov QWORD PTR [rcx+112], r11 mov QWORD PTR [rcx+120], r12 sbb r13, 0 sub r12, QWORD PTR [r9+120] sbb r12, r12 not r12 or r13, r12 mov r11, QWORD PTR [r9] mov r12, QWORD PTR [r9+8] and r11, r13 and r12, r13 mov QWORD PTR [rsp], r11 mov QWORD PTR [rsp+8], r12 mov r11, QWORD PTR [r9+16] mov r12, QWORD PTR [r9+24] and r11, r13 and r12, r13 mov QWORD PTR [rsp+16], r11 mov QWORD PTR [rsp+24], r12 mov r11, QWORD PTR [r9+32] mov r12, QWORD PTR [r9+40] and r11, r13 and r12, r13 mov QWORD PTR [rsp+32], r11 mov QWORD PTR [rsp+40], r12 mov r11, QWORD PTR [r9+48] mov r12, QWORD PTR [r9+56] and r11, r13 and r12, r13 mov QWORD PTR [rsp+48], r11 mov QWORD PTR [rsp+56], r12 mov r11, QWORD PTR [r9+64] mov r12, QWORD PTR [r9+72] and r11, r13 and r12, r13 mov QWORD PTR [rsp+64], r11 mov QWORD PTR [rsp+72], r12 mov r11, QWORD PTR [r9+80] mov r12, QWORD PTR [r9+88] and r11, r13 and r12, r13 mov QWORD PTR [rsp+80], r11 mov QWORD PTR [rsp+88], r12 mov r11, QWORD PTR [r9+96] mov r12, QWORD PTR [r9+104] and r11, r13 and r12, r13 mov QWORD PTR [rsp+96], r11 mov QWORD PTR [rsp+104], r12 mov r11, QWORD PTR [r9+112] mov r12, QWORD PTR [r9+120] and r11, r13 and r12, r13 mov QWORD PTR [rsp+112], r11 mov QWORD PTR [rsp+120], r12 mov rax, QWORD PTR [rcx] mov r10, QWORD PTR [rcx+8] sub rax, QWORD PTR [rsp] sbb r10, QWORD PTR [rsp+8] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [rcx+16] mov r10, QWORD PTR [rcx+24] sbb rax, QWORD PTR [rsp+16] sbb r10, QWORD PTR [rsp+24] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [rcx+32] mov r10, QWORD PTR [rcx+40] sbb rax, QWORD PTR [rsp+32] sbb r10, QWORD PTR [rsp+40] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [rcx+48] mov r10, QWORD PTR [rcx+56] sbb rax, QWORD PTR [rsp+48] sbb r10, QWORD PTR [rsp+56] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 mov rax, QWORD PTR [rcx+64] mov r10, QWORD PTR [rcx+72] sbb rax, QWORD PTR [rsp+64] sbb r10, QWORD PTR [rsp+72] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov rax, QWORD PTR [rcx+80] mov r10, QWORD PTR [rcx+88] sbb rax, QWORD PTR [rsp+80] sbb r10, QWORD PTR [rsp+88] mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r10 mov rax, QWORD PTR [rcx+96] mov r10, QWORD PTR [rcx+104] sbb rax, QWORD PTR [rsp+96] sbb r10, QWORD PTR [rsp+104] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov rax, QWORD PTR [rcx+112] mov r10, QWORD PTR [rcx+120] sbb rax, QWORD PTR [rsp+112] sbb r10, QWORD PTR [rsp+120] mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r10 add rsp, 128 pop r13 pop r12 ret sp_1024_mont_add_16 ENDP _text ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. ; * a Number to souble in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_dbl_16 PROC push r12 sub rsp, 128 mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] add rax, QWORD PTR [rdx] mov r12, 0 adc r9, QWORD PTR [rdx+8] adc r10, QWORD PTR [rdx+16] adc r11, QWORD PTR [rdx+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] mov r11, QWORD PTR [rdx+56] adc rax, QWORD PTR [rdx+32] adc r9, QWORD PTR [rdx+40] adc r10, QWORD PTR [rdx+48] adc r11, QWORD PTR [rdx+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rdx+64] mov r9, QWORD PTR [rdx+72] mov r10, QWORD PTR [rdx+80] mov r11, QWORD PTR [rdx+88] adc rax, QWORD PTR [rdx+64] adc r9, QWORD PTR [rdx+72] adc r10, QWORD PTR [rdx+80] adc r11, QWORD PTR [rdx+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rdx+96] mov r9, QWORD PTR [rdx+104] mov r10, QWORD PTR [rdx+112] mov r11, QWORD PTR [rdx+120] adc rax, QWORD PTR [rdx+96] adc r9, QWORD PTR [rdx+104] adc r10, QWORD PTR [rdx+112] adc r11, QWORD PTR [rdx+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb r12, 0 sub r11, QWORD PTR [r8+120] sbb r11, r11 not r11 or r12, r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r12 and r11, r12 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r12 and r11, r12 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r12 and r11, r12 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r12 and r11, r12 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r12 and r11, r12 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r12 and r11, r12 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r12 and r11, r12 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r12 and r11, r12 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] sub rax, QWORD PTR [rsp] sbb r9, QWORD PTR [rsp+8] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov rax, QWORD PTR [rcx+16] mov r9, QWORD PTR [rcx+24] sbb rax, QWORD PTR [rsp+16] sbb r9, QWORD PTR [rsp+24] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r9 mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] sbb rax, QWORD PTR [rsp+32] sbb r9, QWORD PTR [rsp+40] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [rcx+56] sbb rax, QWORD PTR [rsp+48] sbb r9, QWORD PTR [rsp+56] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r9 mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] sbb rax, QWORD PTR [rsp+64] sbb r9, QWORD PTR [rsp+72] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov rax, QWORD PTR [rcx+80] mov r9, QWORD PTR [rcx+88] sbb rax, QWORD PTR [rsp+80] sbb r9, QWORD PTR [rsp+88] mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r9 mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] sbb rax, QWORD PTR [rsp+96] sbb r9, QWORD PTR [rsp+104] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov rax, QWORD PTR [rcx+112] mov r9, QWORD PTR [rcx+120] sbb rax, QWORD PTR [rsp+112] sbb r9, QWORD PTR [rsp+120] mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r9 add rsp, 128 pop r12 ret sp_1024_mont_dbl_16 ENDP _text ENDS ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of addition. ; * a Number to souble in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_tpl_16 PROC push r12 sub rsp, 128 mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] add rax, QWORD PTR [rdx] mov r12, 0 adc r9, QWORD PTR [rdx+8] adc r10, QWORD PTR [rdx+16] adc r11, QWORD PTR [rdx+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] mov r11, QWORD PTR [rdx+56] adc rax, QWORD PTR [rdx+32] adc r9, QWORD PTR [rdx+40] adc r10, QWORD PTR [rdx+48] adc r11, QWORD PTR [rdx+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rdx+64] mov r9, QWORD PTR [rdx+72] mov r10, QWORD PTR [rdx+80] mov r11, QWORD PTR [rdx+88] adc rax, QWORD PTR [rdx+64] adc r9, QWORD PTR [rdx+72] adc r10, QWORD PTR [rdx+80] adc r11, QWORD PTR [rdx+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rdx+96] mov r9, QWORD PTR [rdx+104] mov r10, QWORD PTR [rdx+112] mov r11, QWORD PTR [rdx+120] adc rax, QWORD PTR [rdx+96] adc r9, QWORD PTR [rdx+104] adc r10, QWORD PTR [rdx+112] adc r11, QWORD PTR [rdx+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb r12, 0 sub r11, QWORD PTR [r8+120] sbb r11, r11 not r11 or r12, r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r12 and r11, r12 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r12 and r11, r12 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r12 and r11, r12 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r12 and r11, r12 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r12 and r11, r12 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r12 and r11, r12 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r12 and r11, r12 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r12 and r11, r12 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] sub rax, QWORD PTR [rsp] sbb r9, QWORD PTR [rsp+8] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov rax, QWORD PTR [rcx+16] mov r9, QWORD PTR [rcx+24] sbb rax, QWORD PTR [rsp+16] sbb r9, QWORD PTR [rsp+24] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r9 mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] sbb rax, QWORD PTR [rsp+32] sbb r9, QWORD PTR [rsp+40] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [rcx+56] sbb rax, QWORD PTR [rsp+48] sbb r9, QWORD PTR [rsp+56] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r9 mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] sbb rax, QWORD PTR [rsp+64] sbb r9, QWORD PTR [rsp+72] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov rax, QWORD PTR [rcx+80] mov r9, QWORD PTR [rcx+88] sbb rax, QWORD PTR [rsp+80] sbb r9, QWORD PTR [rsp+88] mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r9 mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] sbb rax, QWORD PTR [rsp+96] sbb r9, QWORD PTR [rsp+104] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov rax, QWORD PTR [rcx+112] mov r9, QWORD PTR [rcx+120] sbb rax, QWORD PTR [rsp+112] sbb r9, QWORD PTR [rsp+120] mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r9 mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [rcx+16] mov r11, QWORD PTR [rcx+24] add rax, QWORD PTR [rdx] mov r12, 0 adc r9, QWORD PTR [rdx+8] adc r10, QWORD PTR [rdx+16] adc r11, QWORD PTR [rdx+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] mov r10, QWORD PTR [rcx+48] mov r11, QWORD PTR [rcx+56] adc rax, QWORD PTR [rdx+32] adc r9, QWORD PTR [rdx+40] adc r10, QWORD PTR [rdx+48] adc r11, QWORD PTR [rdx+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] mov r10, QWORD PTR [rcx+80] mov r11, QWORD PTR [rcx+88] adc rax, QWORD PTR [rdx+64] adc r9, QWORD PTR [rdx+72] adc r10, QWORD PTR [rdx+80] adc r11, QWORD PTR [rdx+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [rcx+112] mov r11, QWORD PTR [rcx+120] adc rax, QWORD PTR [rdx+96] adc r9, QWORD PTR [rdx+104] adc r10, QWORD PTR [rdx+112] adc r11, QWORD PTR [rdx+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb r12, 0 sub r11, QWORD PTR [r8+120] sbb r11, r11 not r11 or r12, r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] and r10, r12 and r11, r12 mov QWORD PTR [rsp], r10 mov QWORD PTR [rsp+8], r11 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] and r10, r12 and r11, r12 mov QWORD PTR [rsp+16], r10 mov QWORD PTR [rsp+24], r11 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] and r10, r12 and r11, r12 mov QWORD PTR [rsp+32], r10 mov QWORD PTR [rsp+40], r11 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] and r10, r12 and r11, r12 mov QWORD PTR [rsp+48], r10 mov QWORD PTR [rsp+56], r11 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] and r10, r12 and r11, r12 mov QWORD PTR [rsp+64], r10 mov QWORD PTR [rsp+72], r11 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] and r10, r12 and r11, r12 mov QWORD PTR [rsp+80], r10 mov QWORD PTR [rsp+88], r11 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] and r10, r12 and r11, r12 mov QWORD PTR [rsp+96], r10 mov QWORD PTR [rsp+104], r11 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] and r10, r12 and r11, r12 mov QWORD PTR [rsp+112], r10 mov QWORD PTR [rsp+120], r11 mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] sub rax, QWORD PTR [rsp] sbb r9, QWORD PTR [rsp+8] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov rax, QWORD PTR [rcx+16] mov r9, QWORD PTR [rcx+24] sbb rax, QWORD PTR [rsp+16] sbb r9, QWORD PTR [rsp+24] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r9 mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] sbb rax, QWORD PTR [rsp+32] sbb r9, QWORD PTR [rsp+40] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [rcx+56] sbb rax, QWORD PTR [rsp+48] sbb r9, QWORD PTR [rsp+56] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r9 mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] sbb rax, QWORD PTR [rsp+64] sbb r9, QWORD PTR [rsp+72] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov rax, QWORD PTR [rcx+80] mov r9, QWORD PTR [rcx+88] sbb rax, QWORD PTR [rsp+80] sbb r9, QWORD PTR [rsp+88] mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r9 mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] sbb rax, QWORD PTR [rsp+96] sbb r9, QWORD PTR [rsp+104] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov rax, QWORD PTR [rcx+112] mov r9, QWORD PTR [rcx+120] sbb rax, QWORD PTR [rsp+112] sbb r9, QWORD PTR [rsp+120] mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r9 add rsp, 128 pop r12 ret sp_1024_mont_tpl_16 ENDP _text ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_sub_16 PROC push r12 push r13 sub rsp, 128 mov rax, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] sub rax, QWORD PTR [r8] mov r13, 0 sbb r10, QWORD PTR [r8+8] sbb r11, QWORD PTR [r8+16] sbb r12, QWORD PTR [r8+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 mov rax, QWORD PTR [rdx+32] mov r10, QWORD PTR [rdx+40] mov r11, QWORD PTR [rdx+48] mov r12, QWORD PTR [rdx+56] sbb rax, QWORD PTR [r8+32] sbb r10, QWORD PTR [r8+40] sbb r11, QWORD PTR [r8+48] sbb r12, QWORD PTR [r8+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov QWORD PTR [rcx+48], r11 mov QWORD PTR [rcx+56], r12 mov rax, QWORD PTR [rdx+64] mov r10, QWORD PTR [rdx+72] mov r11, QWORD PTR [rdx+80] mov r12, QWORD PTR [rdx+88] sbb rax, QWORD PTR [r8+64] sbb r10, QWORD PTR [r8+72] sbb r11, QWORD PTR [r8+80] sbb r12, QWORD PTR [r8+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov QWORD PTR [rcx+80], r11 mov QWORD PTR [rcx+88], r12 mov rax, QWORD PTR [rdx+96] mov r10, QWORD PTR [rdx+104] mov r11, QWORD PTR [rdx+112] mov r12, QWORD PTR [rdx+120] sbb rax, QWORD PTR [r8+96] sbb r10, QWORD PTR [r8+104] sbb r11, QWORD PTR [r8+112] sbb r12, QWORD PTR [r8+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov QWORD PTR [rcx+112], r11 mov QWORD PTR [rcx+120], r12 sbb r13, 0 mov r11, QWORD PTR [r9] mov r12, QWORD PTR [r9+8] and r11, r13 and r12, r13 mov QWORD PTR [rsp], r11 mov QWORD PTR [rsp+8], r12 mov r11, QWORD PTR [r9+16] mov r12, QWORD PTR [r9+24] and r11, r13 and r12, r13 mov QWORD PTR [rsp+16], r11 mov QWORD PTR [rsp+24], r12 mov r11, QWORD PTR [r9+32] mov r12, QWORD PTR [r9+40] and r11, r13 and r12, r13 mov QWORD PTR [rsp+32], r11 mov QWORD PTR [rsp+40], r12 mov r11, QWORD PTR [r9+48] mov r12, QWORD PTR [r9+56] and r11, r13 and r12, r13 mov QWORD PTR [rsp+48], r11 mov QWORD PTR [rsp+56], r12 mov r11, QWORD PTR [r9+64] mov r12, QWORD PTR [r9+72] and r11, r13 and r12, r13 mov QWORD PTR [rsp+64], r11 mov QWORD PTR [rsp+72], r12 mov r11, QWORD PTR [r9+80] mov r12, QWORD PTR [r9+88] and r11, r13 and r12, r13 mov QWORD PTR [rsp+80], r11 mov QWORD PTR [rsp+88], r12 mov r11, QWORD PTR [r9+96] mov r12, QWORD PTR [r9+104] and r11, r13 and r12, r13 mov QWORD PTR [rsp+96], r11 mov QWORD PTR [rsp+104], r12 mov r11, QWORD PTR [r9+112] mov r12, QWORD PTR [r9+120] and r11, r13 and r12, r13 mov QWORD PTR [rsp+112], r11 mov QWORD PTR [rsp+120], r12 mov rax, QWORD PTR [rcx] mov r10, QWORD PTR [rcx+8] add rax, QWORD PTR [rsp] adc r10, QWORD PTR [rsp+8] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [rcx+16] mov r10, QWORD PTR [rcx+24] adc rax, QWORD PTR [rsp+16] adc r10, QWORD PTR [rsp+24] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [rcx+32] mov r10, QWORD PTR [rcx+40] adc rax, QWORD PTR [rsp+32] adc r10, QWORD PTR [rsp+40] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [rcx+48] mov r10, QWORD PTR [rcx+56] adc rax, QWORD PTR [rsp+48] adc r10, QWORD PTR [rsp+56] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 mov rax, QWORD PTR [rcx+64] mov r10, QWORD PTR [rcx+72] adc rax, QWORD PTR [rsp+64] adc r10, QWORD PTR [rsp+72] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov rax, QWORD PTR [rcx+80] mov r10, QWORD PTR [rcx+88] adc rax, QWORD PTR [rsp+80] adc r10, QWORD PTR [rsp+88] mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r10 mov rax, QWORD PTR [rcx+96] mov r10, QWORD PTR [rcx+104] adc rax, QWORD PTR [rsp+96] adc r10, QWORD PTR [rsp+104] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov rax, QWORD PTR [rcx+112] mov r10, QWORD PTR [rcx+120] adc rax, QWORD PTR [rsp+112] adc r10, QWORD PTR [rsp+120] mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r10 add rsp, 128 pop r13 pop r12 ret sp_1024_mont_sub_16 ENDP _text ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_div2_16 PROC push r12 push r13 sub rsp, 128 mov r13, QWORD PTR [rdx] xor r12, r12 mov rax, r13 and r13, 1 neg r13 mov r10, QWORD PTR [r8] and r10, r13 mov QWORD PTR [rsp], r10 mov r10, QWORD PTR [r8+8] and r10, r13 mov QWORD PTR [rsp+8], r10 mov r10, QWORD PTR [r8+16] and r10, r13 mov QWORD PTR [rsp+16], r10 mov r10, QWORD PTR [r8+24] and r10, r13 mov QWORD PTR [rsp+24], r10 mov r10, QWORD PTR [r8+32] and r10, r13 mov QWORD PTR [rsp+32], r10 mov r10, QWORD PTR [r8+40] and r10, r13 mov QWORD PTR [rsp+40], r10 mov r10, QWORD PTR [r8+48] and r10, r13 mov QWORD PTR [rsp+48], r10 mov r10, QWORD PTR [r8+56] and r10, r13 mov QWORD PTR [rsp+56], r10 mov r10, QWORD PTR [r8+64] and r10, r13 mov QWORD PTR [rsp+64], r10 mov r10, QWORD PTR [r8+72] and r10, r13 mov QWORD PTR [rsp+72], r10 mov r10, QWORD PTR [r8+80] and r10, r13 mov QWORD PTR [rsp+80], r10 mov r10, QWORD PTR [r8+88] and r10, r13 mov QWORD PTR [rsp+88], r10 mov r10, QWORD PTR [r8+96] and r10, r13 mov QWORD PTR [rsp+96], r10 mov r10, QWORD PTR [r8+104] and r10, r13 mov QWORD PTR [rsp+104], r10 mov r10, QWORD PTR [r8+112] and r10, r13 mov QWORD PTR [rsp+112], r10 mov r10, QWORD PTR [r8+120] and r10, r13 mov QWORD PTR [rsp+120], r10 add QWORD PTR [rsp], rax mov rax, QWORD PTR [rdx+8] adc QWORD PTR [rsp+8], rax mov rax, QWORD PTR [rdx+16] adc QWORD PTR [rsp+16], rax mov rax, QWORD PTR [rdx+24] adc QWORD PTR [rsp+24], rax mov rax, QWORD PTR [rdx+32] adc QWORD PTR [rsp+32], rax mov rax, QWORD PTR [rdx+40] adc QWORD PTR [rsp+40], rax mov rax, QWORD PTR [rdx+48] adc QWORD PTR [rsp+48], rax mov rax, QWORD PTR [rdx+56] adc QWORD PTR [rsp+56], rax mov rax, QWORD PTR [rdx+64] adc QWORD PTR [rsp+64], rax mov rax, QWORD PTR [rdx+72] adc QWORD PTR [rsp+72], rax mov rax, QWORD PTR [rdx+80] adc QWORD PTR [rsp+80], rax mov rax, QWORD PTR [rdx+88] adc QWORD PTR [rsp+88], rax mov rax, QWORD PTR [rdx+96] adc QWORD PTR [rsp+96], rax mov rax, QWORD PTR [rdx+104] adc QWORD PTR [rsp+104], rax mov rax, QWORD PTR [rdx+112] adc QWORD PTR [rsp+112], rax mov rax, QWORD PTR [rdx+120] adc QWORD PTR [rsp+120], rax adc r12, 0 mov rax, QWORD PTR [rsp] mov r9, QWORD PTR [rsp+8] shrd rax, r9, 1 mov QWORD PTR [rcx], rax mov rax, QWORD PTR [rsp+16] shrd r9, rax, 1 mov QWORD PTR [rcx+8], r9 mov r9, QWORD PTR [rsp+24] shrd rax, r9, 1 mov QWORD PTR [rcx+16], rax mov rax, QWORD PTR [rsp+32] shrd r9, rax, 1 mov QWORD PTR [rcx+24], r9 mov r9, QWORD PTR [rsp+40] shrd rax, r9, 1 mov QWORD PTR [rcx+32], rax mov rax, QWORD PTR [rsp+48] shrd r9, rax, 1 mov QWORD PTR [rcx+40], r9 mov r9, QWORD PTR [rsp+56] shrd rax, r9, 1 mov QWORD PTR [rcx+48], rax mov rax, QWORD PTR [rsp+64] shrd r9, rax, 1 mov QWORD PTR [rcx+56], r9 mov r9, QWORD PTR [rsp+72] shrd rax, r9, 1 mov QWORD PTR [rcx+64], rax mov rax, QWORD PTR [rsp+80] shrd r9, rax, 1 mov QWORD PTR [rcx+72], r9 mov r9, QWORD PTR [rsp+88] shrd rax, r9, 1 mov QWORD PTR [rcx+80], rax mov rax, QWORD PTR [rsp+96] shrd r9, rax, 1 mov QWORD PTR [rcx+88], r9 mov r9, QWORD PTR [rsp+104] shrd rax, r9, 1 mov QWORD PTR [rcx+96], rax mov rax, QWORD PTR [rsp+112] shrd r9, rax, 1 mov QWORD PTR [rcx+104], r9 mov r9, QWORD PTR [rsp+120] shrd rax, r9, 1 mov QWORD PTR [rcx+112], rax shrd r9, r12, 1 mov QWORD PTR [rcx+120], r9 add rsp, 128 pop r13 pop r12 ret sp_1024_mont_div2_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 1024 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA sp_1024_mont_reduce_avx2_16 PROC push r12 push r13 push r14 push r15 push rdi push rsi push rbx push rbp mov r9, rcx mov r10, rdx xor rbp, rbp ; i = 16 mov r11, 16 mov r14, QWORD PTR [r9] mov r15, QWORD PTR [r9+8] mov rdi, QWORD PTR [r9+16] mov rsi, QWORD PTR [r9+24] add r9, 64 xor rbp, rbp L_1024_mont_reduce_avx2_16_loop: ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-32] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-24] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-24], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9+-8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-16], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-8], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+8] adcx r12, rax adox r13, rcx mov QWORD PTR [r9], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+16] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+8], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+24] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+16], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9+32] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+24], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+40] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+32], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+48] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+40], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+56] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+48], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9+64] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+56], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+64], r12 adox rbp, rbx adcx rbp, rbx ; mu = a[i] * mp mov rdx, r14 mov r12, r14 imul rdx, r8 xor rbx, rbx ; a[i+0] += m[0] * mu mulx rcx, rax, QWORD PTR [r10] mov r14, r15 adcx r12, rax adox r14, rcx ; a[i+1] += m[1] * mu mulx rcx, rax, QWORD PTR [r10+8] mov r15, rdi adcx r14, rax adox r15, rcx ; a[i+2] += m[2] * mu mulx rcx, rax, QWORD PTR [r10+16] mov rdi, rsi adcx r15, rax adox rdi, rcx ; a[i+3] += m[3] * mu mulx rcx, rax, QWORD PTR [r10+24] mov rsi, QWORD PTR [r9+-24] adcx rdi, rax adox rsi, rcx ; a[i+4] += m[4] * mu mulx rcx, rax, QWORD PTR [r10+32] mov r13, QWORD PTR [r9+-16] adcx rsi, rax adox r13, rcx ; a[i+5] += m[5] * mu mulx rcx, rax, QWORD PTR [r10+40] mov r12, QWORD PTR [r9+-8] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+-16], r13 ; a[i+6] += m[6] * mu mulx rcx, rax, QWORD PTR [r10+48] mov r13, QWORD PTR [r9] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+-8], r12 ; a[i+7] += m[7] * mu mulx rcx, rax, QWORD PTR [r10+56] mov r12, QWORD PTR [r9+8] adcx r13, rax adox r12, rcx mov QWORD PTR [r9], r13 ; a[i+8] += m[8] * mu mulx rcx, rax, QWORD PTR [r10+64] mov r13, QWORD PTR [r9+16] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+8], r12 ; a[i+9] += m[9] * mu mulx rcx, rax, QWORD PTR [r10+72] mov r12, QWORD PTR [r9+24] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+16], r13 ; a[i+10] += m[10] * mu mulx rcx, rax, QWORD PTR [r10+80] mov r13, QWORD PTR [r9+32] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+24], r12 ; a[i+11] += m[11] * mu mulx rcx, rax, QWORD PTR [r10+88] mov r12, QWORD PTR [r9+40] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+32], r13 ; a[i+12] += m[12] * mu mulx rcx, rax, QWORD PTR [r10+96] mov r13, QWORD PTR [r9+48] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+40], r12 ; a[i+13] += m[13] * mu mulx rcx, rax, QWORD PTR [r10+104] mov r12, QWORD PTR [r9+56] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+48], r13 ; a[i+14] += m[14] * mu mulx rcx, rax, QWORD PTR [r10+112] mov r13, QWORD PTR [r9+64] adcx r12, rax adox r13, rcx mov QWORD PTR [r9+56], r12 ; a[i+15] += m[15] * mu mulx rcx, rax, QWORD PTR [r10+120] mov r12, QWORD PTR [r9+72] adcx r13, rax adox r12, rcx mov QWORD PTR [r9+64], r13 adcx r12, rbp mov rbp, rbx mov QWORD PTR [r9+72], r12 adox rbp, rbx adcx rbp, rbx ; a += 2 add r9, 16 ; i -= 2 sub r11, 2 jnz L_1024_mont_reduce_avx2_16_loop sub r9, 64 sub r12, QWORD PTR [r10+120] mov r8, r9 sbb r12, r12 neg rbp not r12 or rbp, r12 sub r9, 128 mov rcx, QWORD PTR [r10] mov rdx, r14 pext rcx, rcx, rbp sub rdx, rcx mov rcx, QWORD PTR [r10+8] mov rax, r15 pext rcx, rcx, rbp mov QWORD PTR [r9], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+16] mov rcx, rdi pext rdx, rdx, rbp mov QWORD PTR [r9+8], rax sbb rcx, rdx mov rax, QWORD PTR [r10+24] mov rdx, rsi pext rax, rax, rbp mov QWORD PTR [r9+16], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+32] mov rax, QWORD PTR [r8+32] pext rcx, rcx, rbp mov QWORD PTR [r9+24], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+40] mov rcx, QWORD PTR [r8+40] pext rdx, rdx, rbp mov QWORD PTR [r9+32], rax sbb rcx, rdx mov rax, QWORD PTR [r10+48] mov rdx, QWORD PTR [r8+48] pext rax, rax, rbp mov QWORD PTR [r9+40], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+56] mov rax, QWORD PTR [r8+56] pext rcx, rcx, rbp mov QWORD PTR [r9+48], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+64] mov rcx, QWORD PTR [r8+64] pext rdx, rdx, rbp mov QWORD PTR [r9+56], rax sbb rcx, rdx mov rax, QWORD PTR [r10+72] mov rdx, QWORD PTR [r8+72] pext rax, rax, rbp mov QWORD PTR [r9+64], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+80] mov rax, QWORD PTR [r8+80] pext rcx, rcx, rbp mov QWORD PTR [r9+72], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+88] mov rcx, QWORD PTR [r8+88] pext rdx, rdx, rbp mov QWORD PTR [r9+80], rax sbb rcx, rdx mov rax, QWORD PTR [r10+96] mov rdx, QWORD PTR [r8+96] pext rax, rax, rbp mov QWORD PTR [r9+88], rcx sbb rdx, rax mov rcx, QWORD PTR [r10+104] mov rax, QWORD PTR [r8+104] pext rcx, rcx, rbp mov QWORD PTR [r9+96], rdx sbb rax, rcx mov rdx, QWORD PTR [r10+112] mov rcx, QWORD PTR [r8+112] pext rdx, rdx, rbp mov QWORD PTR [r9+104], rax sbb rcx, rdx mov rax, QWORD PTR [r10+120] mov rdx, QWORD PTR [r8+120] pext rax, rax, rbp mov QWORD PTR [r9+112], rcx sbb rdx, rax mov QWORD PTR [r9+120], rdx pop rbp pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 ret sp_1024_mont_reduce_avx2_16 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_add_avx2_16 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] add rax, QWORD PTR [r8] mov r13, 0 adc r10, QWORD PTR [r8+8] adc r11, QWORD PTR [r8+16] adc r12, QWORD PTR [r8+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 mov rax, QWORD PTR [rdx+32] mov r10, QWORD PTR [rdx+40] mov r11, QWORD PTR [rdx+48] mov r12, QWORD PTR [rdx+56] adc rax, QWORD PTR [r8+32] adc r10, QWORD PTR [r8+40] adc r11, QWORD PTR [r8+48] adc r12, QWORD PTR [r8+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov QWORD PTR [rcx+48], r11 mov QWORD PTR [rcx+56], r12 mov rax, QWORD PTR [rdx+64] mov r10, QWORD PTR [rdx+72] mov r11, QWORD PTR [rdx+80] mov r12, QWORD PTR [rdx+88] adc rax, QWORD PTR [r8+64] adc r10, QWORD PTR [r8+72] adc r11, QWORD PTR [r8+80] adc r12, QWORD PTR [r8+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov QWORD PTR [rcx+80], r11 mov QWORD PTR [rcx+88], r12 mov rax, QWORD PTR [rdx+96] mov r10, QWORD PTR [rdx+104] mov r11, QWORD PTR [rdx+112] mov r12, QWORD PTR [rdx+120] adc rax, QWORD PTR [r8+96] adc r10, QWORD PTR [r8+104] adc r11, QWORD PTR [r8+112] adc r12, QWORD PTR [r8+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov QWORD PTR [rcx+112], r11 mov QWORD PTR [rcx+120], r12 sbb r13, 0 sub r12, QWORD PTR [r9+120] sbb r12, r12 not r12 or r13, r12 mov r11, QWORD PTR [r9] mov r12, QWORD PTR [r9+8] mov rax, QWORD PTR [rcx] mov r10, QWORD PTR [rcx+8] pext r11, r11, r13 pext r12, r12, r13 sub rax, r11 sbb r10, r12 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov r11, QWORD PTR [r9+16] mov r12, QWORD PTR [r9+24] mov rax, QWORD PTR [rcx+16] mov r10, QWORD PTR [rcx+24] pext r11, r11, r13 pext r12, r12, r13 sbb rax, r11 sbb r10, r12 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov r11, QWORD PTR [r9+32] mov r12, QWORD PTR [r9+40] mov rax, QWORD PTR [rcx+32] mov r10, QWORD PTR [rcx+40] pext r11, r11, r13 pext r12, r12, r13 sbb rax, r11 sbb r10, r12 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov r11, QWORD PTR [r9+48] mov r12, QWORD PTR [r9+56] mov rax, QWORD PTR [rcx+48] mov r10, QWORD PTR [rcx+56] pext r11, r11, r13 pext r12, r12, r13 sbb rax, r11 sbb r10, r12 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 mov r11, QWORD PTR [r9+64] mov r12, QWORD PTR [r9+72] mov rax, QWORD PTR [rcx+64] mov r10, QWORD PTR [rcx+72] pext r11, r11, r13 pext r12, r12, r13 sbb rax, r11 sbb r10, r12 mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov r11, QWORD PTR [r9+80] mov r12, QWORD PTR [r9+88] mov rax, QWORD PTR [rcx+80] mov r10, QWORD PTR [rcx+88] pext r11, r11, r13 pext r12, r12, r13 sbb rax, r11 sbb r10, r12 mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r10 mov r11, QWORD PTR [r9+96] mov r12, QWORD PTR [r9+104] mov rax, QWORD PTR [rcx+96] mov r10, QWORD PTR [rcx+104] pext r11, r11, r13 pext r12, r12, r13 sbb rax, r11 sbb r10, r12 mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov r11, QWORD PTR [r9+112] mov r12, QWORD PTR [r9+120] mov rax, QWORD PTR [rcx+112] mov r10, QWORD PTR [rcx+120] pext r11, r11, r13 pext r12, r12, r13 sbb rax, r11 sbb r10, r12 mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r10 pop r13 pop r12 ret sp_1024_mont_add_avx2_16 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. ; * a Number to souble in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_dbl_avx2_16 PROC push r12 mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] add rax, QWORD PTR [rdx] mov r12, 0 adc r9, QWORD PTR [rdx+8] adc r10, QWORD PTR [rdx+16] adc r11, QWORD PTR [rdx+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] mov r11, QWORD PTR [rdx+56] adc rax, QWORD PTR [rdx+32] adc r9, QWORD PTR [rdx+40] adc r10, QWORD PTR [rdx+48] adc r11, QWORD PTR [rdx+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rdx+64] mov r9, QWORD PTR [rdx+72] mov r10, QWORD PTR [rdx+80] mov r11, QWORD PTR [rdx+88] adc rax, QWORD PTR [rdx+64] adc r9, QWORD PTR [rdx+72] adc r10, QWORD PTR [rdx+80] adc r11, QWORD PTR [rdx+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rdx+96] mov r9, QWORD PTR [rdx+104] mov r10, QWORD PTR [rdx+112] mov r11, QWORD PTR [rdx+120] adc rax, QWORD PTR [rdx+96] adc r9, QWORD PTR [rdx+104] adc r10, QWORD PTR [rdx+112] adc r11, QWORD PTR [rdx+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb r12, 0 sub r11, QWORD PTR [r8+120] sbb r11, r11 not r11 or r12, r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] pext r10, r10, r12 pext r11, r11, r12 sub rax, r10 sbb r9, r11 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] mov rax, QWORD PTR [rcx+16] mov r9, QWORD PTR [rcx+24] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r9 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] mov rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [rcx+56] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r9 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] mov rax, QWORD PTR [rcx+80] mov r9, QWORD PTR [rcx+88] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r9 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] mov rax, QWORD PTR [rcx+112] mov r9, QWORD PTR [rcx+120] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r9 pop r12 ret sp_1024_mont_dbl_avx2_16 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of addition. ; * a Number to souble in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_tpl_avx2_16 PROC push r12 mov rax, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] add rax, QWORD PTR [rdx] mov r12, 0 adc r9, QWORD PTR [rdx+8] adc r10, QWORD PTR [rdx+16] adc r11, QWORD PTR [rdx+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rdx+32] mov r9, QWORD PTR [rdx+40] mov r10, QWORD PTR [rdx+48] mov r11, QWORD PTR [rdx+56] adc rax, QWORD PTR [rdx+32] adc r9, QWORD PTR [rdx+40] adc r10, QWORD PTR [rdx+48] adc r11, QWORD PTR [rdx+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rdx+64] mov r9, QWORD PTR [rdx+72] mov r10, QWORD PTR [rdx+80] mov r11, QWORD PTR [rdx+88] adc rax, QWORD PTR [rdx+64] adc r9, QWORD PTR [rdx+72] adc r10, QWORD PTR [rdx+80] adc r11, QWORD PTR [rdx+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rdx+96] mov r9, QWORD PTR [rdx+104] mov r10, QWORD PTR [rdx+112] mov r11, QWORD PTR [rdx+120] adc rax, QWORD PTR [rdx+96] adc r9, QWORD PTR [rdx+104] adc r10, QWORD PTR [rdx+112] adc r11, QWORD PTR [rdx+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb r12, 0 sub r11, QWORD PTR [r8+120] sbb r11, r11 not r11 or r12, r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] pext r10, r10, r12 pext r11, r11, r12 sub rax, r10 sbb r9, r11 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] mov rax, QWORD PTR [rcx+16] mov r9, QWORD PTR [rcx+24] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r9 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] mov rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [rcx+56] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r9 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] mov rax, QWORD PTR [rcx+80] mov r9, QWORD PTR [rcx+88] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r9 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] mov rax, QWORD PTR [rcx+112] mov r9, QWORD PTR [rcx+120] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r9 mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] mov r10, QWORD PTR [rcx+16] mov r11, QWORD PTR [rcx+24] add rax, QWORD PTR [rdx] mov r12, 0 adc r9, QWORD PTR [rdx+8] adc r10, QWORD PTR [rdx+16] adc r11, QWORD PTR [rdx+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] mov r10, QWORD PTR [rcx+48] mov r11, QWORD PTR [rcx+56] adc rax, QWORD PTR [rdx+32] adc r9, QWORD PTR [rdx+40] adc r10, QWORD PTR [rdx+48] adc r11, QWORD PTR [rdx+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] mov r10, QWORD PTR [rcx+80] mov r11, QWORD PTR [rcx+88] adc rax, QWORD PTR [rdx+64] adc r9, QWORD PTR [rdx+72] adc r10, QWORD PTR [rdx+80] adc r11, QWORD PTR [rdx+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] mov r10, QWORD PTR [rcx+112] mov r11, QWORD PTR [rcx+120] adc rax, QWORD PTR [rdx+96] adc r9, QWORD PTR [rdx+104] adc r10, QWORD PTR [rdx+112] adc r11, QWORD PTR [rdx+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 sbb r12, 0 sub r11, QWORD PTR [r8+120] sbb r11, r11 not r11 or r12, r11 mov r10, QWORD PTR [r8] mov r11, QWORD PTR [r8+8] mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] pext r10, r10, r12 pext r11, r11, r12 sub rax, r10 sbb r9, r11 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r9 mov r10, QWORD PTR [r8+16] mov r11, QWORD PTR [r8+24] mov rax, QWORD PTR [rcx+16] mov r9, QWORD PTR [rcx+24] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r9 mov r10, QWORD PTR [r8+32] mov r11, QWORD PTR [r8+40] mov rax, QWORD PTR [rcx+32] mov r9, QWORD PTR [rcx+40] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r9 mov r10, QWORD PTR [r8+48] mov r11, QWORD PTR [r8+56] mov rax, QWORD PTR [rcx+48] mov r9, QWORD PTR [rcx+56] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r9 mov r10, QWORD PTR [r8+64] mov r11, QWORD PTR [r8+72] mov rax, QWORD PTR [rcx+64] mov r9, QWORD PTR [rcx+72] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r9 mov r10, QWORD PTR [r8+80] mov r11, QWORD PTR [r8+88] mov rax, QWORD PTR [rcx+80] mov r9, QWORD PTR [rcx+88] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r9 mov r10, QWORD PTR [r8+96] mov r11, QWORD PTR [r8+104] mov rax, QWORD PTR [rcx+96] mov r9, QWORD PTR [rcx+104] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r9 mov r10, QWORD PTR [r8+112] mov r11, QWORD PTR [r8+120] mov rax, QWORD PTR [rcx+112] mov r9, QWORD PTR [rcx+120] pext r10, r10, r12 pext r11, r11, r12 sbb rax, r10 sbb r9, r11 mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r9 pop r12 ret sp_1024_mont_tpl_avx2_16 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of addition. ; * a First number to add in Montgomery form. ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_sub_avx2_16 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r10, QWORD PTR [rdx+8] mov r11, QWORD PTR [rdx+16] mov r12, QWORD PTR [rdx+24] sub rax, QWORD PTR [r8] mov r13, 0 sbb r10, QWORD PTR [r8+8] sbb r11, QWORD PTR [r8+16] sbb r12, QWORD PTR [r8+24] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov QWORD PTR [rcx+16], r11 mov QWORD PTR [rcx+24], r12 mov rax, QWORD PTR [rdx+32] mov r10, QWORD PTR [rdx+40] mov r11, QWORD PTR [rdx+48] mov r12, QWORD PTR [rdx+56] sbb rax, QWORD PTR [r8+32] sbb r10, QWORD PTR [r8+40] sbb r11, QWORD PTR [r8+48] sbb r12, QWORD PTR [r8+56] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov QWORD PTR [rcx+48], r11 mov QWORD PTR [rcx+56], r12 mov rax, QWORD PTR [rdx+64] mov r10, QWORD PTR [rdx+72] mov r11, QWORD PTR [rdx+80] mov r12, QWORD PTR [rdx+88] sbb rax, QWORD PTR [r8+64] sbb r10, QWORD PTR [r8+72] sbb r11, QWORD PTR [r8+80] sbb r12, QWORD PTR [r8+88] mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov QWORD PTR [rcx+80], r11 mov QWORD PTR [rcx+88], r12 mov rax, QWORD PTR [rdx+96] mov r10, QWORD PTR [rdx+104] mov r11, QWORD PTR [rdx+112] mov r12, QWORD PTR [rdx+120] sbb rax, QWORD PTR [r8+96] sbb r10, QWORD PTR [r8+104] sbb r11, QWORD PTR [r8+112] sbb r12, QWORD PTR [r8+120] mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov QWORD PTR [rcx+112], r11 mov QWORD PTR [rcx+120], r12 sbb r13, 0 mov r11, QWORD PTR [r9] mov r12, QWORD PTR [r9+8] mov rax, QWORD PTR [rcx] mov r10, QWORD PTR [rcx+8] pext r11, r11, r13 pext r12, r12, r13 add rax, r11 adc r10, r12 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov r11, QWORD PTR [r9+16] mov r12, QWORD PTR [r9+24] mov rax, QWORD PTR [rcx+16] mov r10, QWORD PTR [rcx+24] pext r11, r11, r13 pext r12, r12, r13 adc rax, r11 adc r10, r12 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov r11, QWORD PTR [r9+32] mov r12, QWORD PTR [r9+40] mov rax, QWORD PTR [rcx+32] mov r10, QWORD PTR [rcx+40] pext r11, r11, r13 pext r12, r12, r13 adc rax, r11 adc r10, r12 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov r11, QWORD PTR [r9+48] mov r12, QWORD PTR [r9+56] mov rax, QWORD PTR [rcx+48] mov r10, QWORD PTR [rcx+56] pext r11, r11, r13 pext r12, r12, r13 adc rax, r11 adc r10, r12 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 mov r11, QWORD PTR [r9+64] mov r12, QWORD PTR [r9+72] mov rax, QWORD PTR [rcx+64] mov r10, QWORD PTR [rcx+72] pext r11, r11, r13 pext r12, r12, r13 adc rax, r11 adc r10, r12 mov QWORD PTR [rcx+64], rax mov QWORD PTR [rcx+72], r10 mov r11, QWORD PTR [r9+80] mov r12, QWORD PTR [r9+88] mov rax, QWORD PTR [rcx+80] mov r10, QWORD PTR [rcx+88] pext r11, r11, r13 pext r12, r12, r13 adc rax, r11 adc r10, r12 mov QWORD PTR [rcx+80], rax mov QWORD PTR [rcx+88], r10 mov r11, QWORD PTR [r9+96] mov r12, QWORD PTR [r9+104] mov rax, QWORD PTR [rcx+96] mov r10, QWORD PTR [rcx+104] pext r11, r11, r13 pext r12, r12, r13 adc rax, r11 adc r10, r12 mov QWORD PTR [rcx+96], rax mov QWORD PTR [rcx+104], r10 mov r11, QWORD PTR [r9+112] mov r12, QWORD PTR [r9+120] mov rax, QWORD PTR [rcx+112] mov r10, QWORD PTR [rcx+120] pext r11, r11, r13 pext r12, r12, r13 adc rax, r11 adc r10, r12 mov QWORD PTR [rcx+112], rax mov QWORD PTR [rcx+120], r10 pop r13 pop r12 ret sp_1024_mont_sub_avx2_16 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA sp_1024_mont_div2_avx2_16 PROC push r12 push r13 mov r13, QWORD PTR [rdx] xor r12, r12 mov r10, r13 and r13, 1 neg r13 mov rax, QWORD PTR [r8] mov r9, QWORD PTR [r8+8] mov r10, QWORD PTR [rdx] mov r11, QWORD PTR [rdx+8] pext rax, rax, r13 pext r9, r9, r13 add r10, rax adc r11, r9 mov QWORD PTR [rcx], r10 mov QWORD PTR [rcx+8], r11 mov rax, QWORD PTR [r8+16] mov r9, QWORD PTR [r8+24] mov r10, QWORD PTR [rdx+16] mov r11, QWORD PTR [rdx+24] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+16], r10 mov QWORD PTR [rcx+24], r11 mov rax, QWORD PTR [r8+32] mov r9, QWORD PTR [r8+40] mov r10, QWORD PTR [rdx+32] mov r11, QWORD PTR [rdx+40] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+32], r10 mov QWORD PTR [rcx+40], r11 mov rax, QWORD PTR [r8+48] mov r9, QWORD PTR [r8+56] mov r10, QWORD PTR [rdx+48] mov r11, QWORD PTR [rdx+56] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+48], r10 mov QWORD PTR [rcx+56], r11 mov rax, QWORD PTR [r8+64] mov r9, QWORD PTR [r8+72] mov r10, QWORD PTR [rdx+64] mov r11, QWORD PTR [rdx+72] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+64], r10 mov QWORD PTR [rcx+72], r11 mov rax, QWORD PTR [r8+80] mov r9, QWORD PTR [r8+88] mov r10, QWORD PTR [rdx+80] mov r11, QWORD PTR [rdx+88] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+80], r10 mov QWORD PTR [rcx+88], r11 mov rax, QWORD PTR [r8+96] mov r9, QWORD PTR [r8+104] mov r10, QWORD PTR [rdx+96] mov r11, QWORD PTR [rdx+104] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+96], r10 mov QWORD PTR [rcx+104], r11 mov rax, QWORD PTR [r8+112] mov r9, QWORD PTR [r8+120] mov r10, QWORD PTR [rdx+112] mov r11, QWORD PTR [rdx+120] pext rax, rax, r13 pext r9, r9, r13 adc r10, rax adc r11, r9 mov QWORD PTR [rcx+112], r10 mov QWORD PTR [rcx+120], r11 adc r12, 0 mov r10, QWORD PTR [rcx] mov r11, QWORD PTR [rcx+8] shrd r10, r11, 1 mov QWORD PTR [rcx], r10 mov r10, QWORD PTR [rcx+16] shrd r11, r10, 1 mov QWORD PTR [rcx+8], r11 mov r11, QWORD PTR [rcx+24] shrd r10, r11, 1 mov QWORD PTR [rcx+16], r10 mov r10, QWORD PTR [rcx+32] shrd r11, r10, 1 mov QWORD PTR [rcx+24], r11 mov r11, QWORD PTR [rcx+40] shrd r10, r11, 1 mov QWORD PTR [rcx+32], r10 mov r10, QWORD PTR [rcx+48] shrd r11, r10, 1 mov QWORD PTR [rcx+40], r11 mov r11, QWORD PTR [rcx+56] shrd r10, r11, 1 mov QWORD PTR [rcx+48], r10 mov r10, QWORD PTR [rcx+64] shrd r11, r10, 1 mov QWORD PTR [rcx+56], r11 mov r11, QWORD PTR [rcx+72] shrd r10, r11, 1 mov QWORD PTR [rcx+64], r10 mov r10, QWORD PTR [rcx+80] shrd r11, r10, 1 mov QWORD PTR [rcx+72], r11 mov r11, QWORD PTR [rcx+88] shrd r10, r11, 1 mov QWORD PTR [rcx+80], r10 mov r10, QWORD PTR [rcx+96] shrd r11, r10, 1 mov QWORD PTR [rcx+88], r11 mov r11, QWORD PTR [rcx+104] shrd r10, r11, 1 mov QWORD PTR [rcx+96], r10 mov r10, QWORD PTR [rcx+112] shrd r11, r10, 1 mov QWORD PTR [rcx+104], r11 mov r11, QWORD PTR [rcx+120] shrd r10, r11, 1 mov QWORD PTR [rcx+112], r10 shrd r11, r12, 1 mov QWORD PTR [rcx+120], r11 pop r13 pop r12 ret sp_1024_mont_div2_avx2_16 ENDP _text ENDS ENDIF ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_1024_from_bin_bswap PROC push r12 push r13 mov r11, r8 mov r12, rcx add r11, r9 add r12, 128 xor r13, r13 jmp L_1024_from_bin_bswap_64_end L_1024_from_bin_bswap_64_start: sub r11, 64 mov rax, QWORD PTR [r11+56] mov r10, QWORD PTR [r11+48] bswap rax bswap r10 mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 mov rax, QWORD PTR [r11+40] mov r10, QWORD PTR [r11+32] bswap rax bswap r10 mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 mov rax, QWORD PTR [r11+24] mov r10, QWORD PTR [r11+16] bswap rax bswap r10 mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 mov rax, QWORD PTR [r11+8] mov r10, QWORD PTR [r11] bswap rax bswap r10 mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_1024_from_bin_bswap_64_end: cmp r9, 63 jg L_1024_from_bin_bswap_64_start jmp L_1024_from_bin_bswap_8_end L_1024_from_bin_bswap_8_start: sub r11, 8 mov rax, QWORD PTR [r11] bswap rax mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_1024_from_bin_bswap_8_end: cmp r9, 7 jg L_1024_from_bin_bswap_8_start cmp r9, r13 je L_1024_from_bin_bswap_hi_end mov r10, r13 mov rax, r13 L_1024_from_bin_bswap_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_1024_from_bin_bswap_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_1024_from_bin_bswap_hi_end: cmp rcx, r12 jge L_1024_from_bin_bswap_zero_end L_1024_from_bin_bswap_zero_start: mov QWORD PTR [rcx], r13 add rcx, 8 cmp rcx, r12 jl L_1024_from_bin_bswap_zero_start L_1024_from_bin_bswap_zero_end: pop r13 pop r12 ret sp_1024_from_bin_bswap ENDP _text ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. ; * ; * r A single precision integer. ; * size Maximum number of bytes to convert ; * a Byte array. ; * n Number of bytes in array to read. ; */ _text SEGMENT READONLY PARA sp_1024_from_bin_movbe PROC push r12 mov r11, r8 mov r12, rcx add r11, r9 add r12, 128 jmp L_1024_from_bin_movbe_64_end L_1024_from_bin_movbe_64_start: sub r11, 64 movbe rax, QWORD PTR [r11+56] movbe r10, QWORD PTR [r11+48] mov QWORD PTR [rcx], rax mov QWORD PTR [rcx+8], r10 movbe rax, QWORD PTR [r11+40] movbe r10, QWORD PTR [r11+32] mov QWORD PTR [rcx+16], rax mov QWORD PTR [rcx+24], r10 movbe rax, QWORD PTR [r11+24] movbe r10, QWORD PTR [r11+16] mov QWORD PTR [rcx+32], rax mov QWORD PTR [rcx+40], r10 movbe rax, QWORD PTR [r11+8] movbe r10, QWORD PTR [r11] mov QWORD PTR [rcx+48], rax mov QWORD PTR [rcx+56], r10 add rcx, 64 sub r9, 64 L_1024_from_bin_movbe_64_end: cmp r9, 63 jg L_1024_from_bin_movbe_64_start jmp L_1024_from_bin_movbe_8_end L_1024_from_bin_movbe_8_start: sub r11, 8 movbe rax, QWORD PTR [r11] mov QWORD PTR [rcx], rax add rcx, 8 sub r9, 8 L_1024_from_bin_movbe_8_end: cmp r9, 7 jg L_1024_from_bin_movbe_8_start cmp r9, 0 je L_1024_from_bin_movbe_hi_end mov r10, 0 mov rax, 0 L_1024_from_bin_movbe_hi_start: mov al, BYTE PTR [r8] shl r10, 8 inc r8 add r10, rax dec r9 jg L_1024_from_bin_movbe_hi_start mov QWORD PTR [rcx], r10 add rcx, 8 L_1024_from_bin_movbe_hi_end: cmp rcx, r12 jge L_1024_from_bin_movbe_zero_end L_1024_from_bin_movbe_zero_start: mov QWORD PTR [rcx], 0 add rcx, 8 cmp rcx, r12 jl L_1024_from_bin_movbe_zero_start L_1024_from_bin_movbe_zero_end: pop r12 ret sp_1024_from_bin_movbe ENDP _text ENDS ENDIF ENDIF END