lokinet/crypto/curve25519/sandy2x/fe51_mul.S
2018-10-23 07:29:37 -04:00

198 lines
3.1 KiB
ArmAsm

#ifdef IN_SANDY2X
/*
This file is basically amd64-51/fe25519_mul.s.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.text
.p2align 5
#ifdef ASM_HIDE_SYMBOL
ASM_HIDE_SYMBOL fe51_mul
ASM_HIDE_SYMBOL _fe51_mul
#endif
.globl fe51_mul
.globl _fe51_mul
#ifdef __ELF__
.type fe51_mul, @function
.type _fe51_mul, @function
#endif
fe51_mul:
_fe51_mul:
mov %rsp,%r11
and $31,%r11
add $96,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq %rdi,56(%rsp)
mov %rdx,%rcx
movq 24(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,64(%rsp)
mulq 16(%rcx)
mov %rax,%r8
mov %rdx,%r9
movq 32(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,72(%rsp)
mulq 8(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 0(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 8(%rcx)
mov %rax,%r10
mov %rdx,%r11
movq 0(%rsi),%rax
mulq 16(%rcx)
mov %rax,%r12
mov %rdx,%r13
movq 0(%rsi),%rax
mulq 24(%rcx)
mov %rax,%r14
mov %rdx,%r15
movq 0(%rsi),%rax
mulq 32(%rcx)
mov %rax,%rbx
mov %rdx,%rbp
movq 8(%rsi),%rax
mulq 0(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsi),%rax
mulq 8(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 8(%rsi),%rax
mulq 16(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsi),%rax
mulq 24(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 8(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rax
mulq 0(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 16(%rsi),%rax
mulq 8(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 16(%rsi),%rax
mulq 16(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 24(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 24(%rsi),%rax
mulq 0(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 24(%rsi),%rax
mulq 8(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 64(%rsp),%rax
mulq 24(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 64(%rsp),%rax
mulq 32(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 32(%rsi),%rax
mulq 0(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 72(%rsp),%rax
mulq 16(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 72(%rsp),%rax
mulq 24(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 72(%rsp),%rax
mulq 32(%rcx)
add %rax,%r14
adc %rdx,%r15
movq REDMASK51(%rip),%rsi
shld $13,%r8,%r9
and %rsi,%r8
shld $13,%r10,%r11
and %rsi,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rsi,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rsi,%r14
add %r13,%r14
shld $13,%rbx,%rbp
and %rsi,%rbx
add %r15,%rbx
imulq $19,%rbp,%rdx
add %rdx,%r8
mov %r8,%rdx
shr $51,%rdx
add %r10,%rdx
mov %rdx,%rcx
shr $51,%rdx
and %rsi,%r8
add %r12,%rdx
mov %rdx,%r9
shr $51,%rdx
and %rsi,%rcx
add %r14,%rdx
mov %rdx,%rax
shr $51,%rdx
and %rsi,%r9
add %rbx,%rdx
mov %rdx,%r10
shr $51,%rdx
and %rsi,%rax
imulq $19,%rdx,%rdx
add %rdx,%r8
and %rsi,%r10
movq %r8,0(%rdi)
movq %rcx,8(%rdi)
movq %r9,16(%rdi)
movq %rax,24(%rdi)
movq %r10,32(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret
#endif