mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1273 lines
24 KiB
1273 lines
24 KiB
#!/usr/bin/env perl |
|
# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause |
|
# |
|
# ==================================================================== |
|
# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL |
|
# project. |
|
# ==================================================================== |
|
|
|
# Poly1305 hash for MIPS. |
|
# |
|
# May 2016 |
|
# |
|
# Numbers are cycles per processed byte with poly1305_blocks alone. |
|
# |
|
# IALU/gcc |
|
# R1x000 ~5.5/+130% (big-endian) |
|
# Octeon II 2.50/+70% (little-endian) |
|
# |
|
# March 2019 |
|
# |
|
# Add 32-bit code path. |
|
# |
|
# October 2019 |
|
# |
|
# Modulo-scheduling reduction allows to omit dependency chain at the |
|
# end of inner loop and improve performance. Also optimize MIPS32R2 |
|
# code path for MIPS 1004K core. Per René von Dorst's suggestions. |
|
# |
|
# IALU/gcc |
|
# R1x000 ~9.8/? (big-endian) |
|
# Octeon II 3.65/+140% (little-endian) |
|
# MT7621/1004K 4.75/? (little-endian) |
|
# |
|
###################################################################### |
|
# There is a number of MIPS ABI in use, O32 and N32/64 are most |
|
# widely used. Then there is a new contender: NUBI. It appears that if |
|
# one picks the latter, it's possible to arrange code in ABI neutral |
|
# manner. Therefore let's stick to NUBI register layout: |
|
# |
|
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); |
|
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); |
|
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); |
|
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); |
|
# |
|
# The return value is placed in $a0. Following coding rules facilitate |
|
# interoperability: |
|
# |
|
# - never ever touch $tp, "thread pointer", former $gp [o32 can be |
|
# excluded from the rule, because it's specified volatile]; |
|
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting |
|
# old code]; |
|
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; |
|
# |
|
# For reference here is register layout for N32/64 MIPS ABIs: |
|
# |
|
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); |
|
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); |
|
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); |
|
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); |
|
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); |
|
# |
|
# <[email protected]> |
|
# |
|
###################################################################### |
|
|
|
$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 |
|
|
|
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; |
|
|
|
if ($flavour =~ /64|n32/i) {{{ |
|
###################################################################### |
|
# 64-bit code path |
|
# |
|
|
|
my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); |
|
my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); |
|
|
|
$code.=<<___; |
|
#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ |
|
defined(_MIPS_ARCH_MIPS64R6)) \\ |
|
&& !defined(_MIPS_ARCH_MIPS64R2) |
|
# define _MIPS_ARCH_MIPS64R2 |
|
#endif |
|
|
|
#if defined(_MIPS_ARCH_MIPS64R6) |
|
# define dmultu(rs,rt) |
|
# define mflo(rd,rs,rt) dmulu rd,rs,rt |
|
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt |
|
#else |
|
# define dmultu(rs,rt) dmultu rs,rt |
|
# define mflo(rd,rs,rt) mflo rd |
|
# define mfhi(rd,rs,rt) mfhi rd |
|
#endif |
|
|
|
#ifdef __KERNEL__ |
|
# define poly1305_init poly1305_init_mips |
|
# define poly1305_blocks poly1305_blocks_mips |
|
# define poly1305_emit poly1305_emit_mips |
|
#endif |
|
|
|
#if defined(__MIPSEB__) && !defined(MIPSEB) |
|
# define MIPSEB |
|
#endif |
|
|
|
#ifdef MIPSEB |
|
# define MSB 0 |
|
# define LSB 7 |
|
#else |
|
# define MSB 7 |
|
# define LSB 0 |
|
#endif |
|
|
|
.text |
|
.set noat |
|
.set noreorder |
|
|
|
.align 5 |
|
.globl poly1305_init |
|
.ent poly1305_init |
|
poly1305_init: |
|
.frame $sp,0,$ra |
|
.set reorder |
|
|
|
sd $zero,0($ctx) |
|
sd $zero,8($ctx) |
|
sd $zero,16($ctx) |
|
|
|
beqz $inp,.Lno_key |
|
|
|
#if defined(_MIPS_ARCH_MIPS64R6) |
|
andi $tmp0,$inp,7 # $inp % 8 |
|
dsubu $inp,$inp,$tmp0 # align $inp |
|
sll $tmp0,$tmp0,3 # byte to bit offset |
|
ld $in0,0($inp) |
|
ld $in1,8($inp) |
|
beqz $tmp0,.Laligned_key |
|
ld $tmp2,16($inp) |
|
|
|
subu $tmp1,$zero,$tmp0 |
|
# ifdef MIPSEB |
|
dsllv $in0,$in0,$tmp0 |
|
dsrlv $tmp3,$in1,$tmp1 |
|
dsllv $in1,$in1,$tmp0 |
|
dsrlv $tmp2,$tmp2,$tmp1 |
|
# else |
|
dsrlv $in0,$in0,$tmp0 |
|
dsllv $tmp3,$in1,$tmp1 |
|
dsrlv $in1,$in1,$tmp0 |
|
dsllv $tmp2,$tmp2,$tmp1 |
|
# endif |
|
or $in0,$in0,$tmp3 |
|
or $in1,$in1,$tmp2 |
|
.Laligned_key: |
|
#else |
|
ldl $in0,0+MSB($inp) |
|
ldl $in1,8+MSB($inp) |
|
ldr $in0,0+LSB($inp) |
|
ldr $in1,8+LSB($inp) |
|
#endif |
|
#ifdef MIPSEB |
|
# if defined(_MIPS_ARCH_MIPS64R2) |
|
dsbh $in0,$in0 # byte swap |
|
dsbh $in1,$in1 |
|
dshd $in0,$in0 |
|
dshd $in1,$in1 |
|
# else |
|
ori $tmp0,$zero,0xFF |
|
dsll $tmp2,$tmp0,32 |
|
or $tmp0,$tmp2 # 0x000000FF000000FF |
|
|
|
and $tmp1,$in0,$tmp0 # byte swap |
|
and $tmp3,$in1,$tmp0 |
|
dsrl $tmp2,$in0,24 |
|
dsrl $tmp4,$in1,24 |
|
dsll $tmp1,24 |
|
dsll $tmp3,24 |
|
and $tmp2,$tmp0 |
|
and $tmp4,$tmp0 |
|
dsll $tmp0,8 # 0x0000FF000000FF00 |
|
or $tmp1,$tmp2 |
|
or $tmp3,$tmp4 |
|
and $tmp2,$in0,$tmp0 |
|
and $tmp4,$in1,$tmp0 |
|
dsrl $in0,8 |
|
dsrl $in1,8 |
|
dsll $tmp2,8 |
|
dsll $tmp4,8 |
|
and $in0,$tmp0 |
|
and $in1,$tmp0 |
|
or $tmp1,$tmp2 |
|
or $tmp3,$tmp4 |
|
or $in0,$tmp1 |
|
or $in1,$tmp3 |
|
dsrl $tmp1,$in0,32 |
|
dsrl $tmp3,$in1,32 |
|
dsll $in0,32 |
|
dsll $in1,32 |
|
or $in0,$tmp1 |
|
or $in1,$tmp3 |
|
# endif |
|
#endif |
|
li $tmp0,1 |
|
dsll $tmp0,32 # 0x0000000100000000 |
|
daddiu $tmp0,-63 # 0x00000000ffffffc1 |
|
dsll $tmp0,28 # 0x0ffffffc10000000 |
|
daddiu $tmp0,-1 # 0x0ffffffc0fffffff |
|
|
|
and $in0,$tmp0 |
|
daddiu $tmp0,-3 # 0x0ffffffc0ffffffc |
|
and $in1,$tmp0 |
|
|
|
sd $in0,24($ctx) |
|
dsrl $tmp0,$in1,2 |
|
sd $in1,32($ctx) |
|
daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) |
|
sd $tmp0,40($ctx) |
|
|
|
.Lno_key: |
|
li $v0,0 # return 0 |
|
jr $ra |
|
.end poly1305_init |
|
___ |
|
{ |
|
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; |
|
|
|
my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = |
|
($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); |
|
my ($shr,$shl) = ($s6,$s7); # used on R6 |
|
|
|
$code.=<<___; |
|
.align 5 |
|
.globl poly1305_blocks |
|
.ent poly1305_blocks |
|
poly1305_blocks: |
|
.set noreorder |
|
dsrl $len,4 # number of complete blocks |
|
bnez $len,poly1305_blocks_internal |
|
nop |
|
jr $ra |
|
nop |
|
.end poly1305_blocks |
|
|
|
.align 5 |
|
.ent poly1305_blocks_internal |
|
poly1305_blocks_internal: |
|
.set noreorder |
|
#if defined(_MIPS_ARCH_MIPS64R6) |
|
.frame $sp,8*8,$ra |
|
.mask $SAVED_REGS_MASK|0x000c0000,-8 |
|
dsubu $sp,8*8 |
|
sd $s7,56($sp) |
|
sd $s6,48($sp) |
|
#else |
|
.frame $sp,6*8,$ra |
|
.mask $SAVED_REGS_MASK,-8 |
|
dsubu $sp,6*8 |
|
#endif |
|
sd $s5,40($sp) |
|
sd $s4,32($sp) |
|
___ |
|
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue |
|
sd $s3,24($sp) |
|
sd $s2,16($sp) |
|
sd $s1,8($sp) |
|
sd $s0,0($sp) |
|
___ |
|
$code.=<<___; |
|
.set reorder |
|
|
|
#if defined(_MIPS_ARCH_MIPS64R6) |
|
andi $shr,$inp,7 |
|
dsubu $inp,$inp,$shr # align $inp |
|
sll $shr,$shr,3 # byte to bit offset |
|
subu $shl,$zero,$shr |
|
#endif |
|
|
|
ld $h0,0($ctx) # load hash value |
|
ld $h1,8($ctx) |
|
ld $h2,16($ctx) |
|
|
|
ld $r0,24($ctx) # load key |
|
ld $r1,32($ctx) |
|
ld $rs1,40($ctx) |
|
|
|
dsll $len,4 |
|
daddu $len,$inp # end of buffer |
|
b .Loop |
|
|
|
.align 4 |
|
.Loop: |
|
#if defined(_MIPS_ARCH_MIPS64R6) |
|
ld $in0,0($inp) # load input |
|
ld $in1,8($inp) |
|
beqz $shr,.Laligned_inp |
|
|
|
ld $tmp2,16($inp) |
|
# ifdef MIPSEB |
|
dsllv $in0,$in0,$shr |
|
dsrlv $tmp3,$in1,$shl |
|
dsllv $in1,$in1,$shr |
|
dsrlv $tmp2,$tmp2,$shl |
|
# else |
|
dsrlv $in0,$in0,$shr |
|
dsllv $tmp3,$in1,$shl |
|
dsrlv $in1,$in1,$shr |
|
dsllv $tmp2,$tmp2,$shl |
|
# endif |
|
or $in0,$in0,$tmp3 |
|
or $in1,$in1,$tmp2 |
|
.Laligned_inp: |
|
#else |
|
ldl $in0,0+MSB($inp) # load input |
|
ldl $in1,8+MSB($inp) |
|
ldr $in0,0+LSB($inp) |
|
ldr $in1,8+LSB($inp) |
|
#endif |
|
daddiu $inp,16 |
|
#ifdef MIPSEB |
|
# if defined(_MIPS_ARCH_MIPS64R2) |
|
dsbh $in0,$in0 # byte swap |
|
dsbh $in1,$in1 |
|
dshd $in0,$in0 |
|
dshd $in1,$in1 |
|
# else |
|
ori $tmp0,$zero,0xFF |
|
dsll $tmp2,$tmp0,32 |
|
or $tmp0,$tmp2 # 0x000000FF000000FF |
|
|
|
and $tmp1,$in0,$tmp0 # byte swap |
|
and $tmp3,$in1,$tmp0 |
|
dsrl $tmp2,$in0,24 |
|
dsrl $tmp4,$in1,24 |
|
dsll $tmp1,24 |
|
dsll $tmp3,24 |
|
and $tmp2,$tmp0 |
|
and $tmp4,$tmp0 |
|
dsll $tmp0,8 # 0x0000FF000000FF00 |
|
or $tmp1,$tmp2 |
|
or $tmp3,$tmp4 |
|
and $tmp2,$in0,$tmp0 |
|
and $tmp4,$in1,$tmp0 |
|
dsrl $in0,8 |
|
dsrl $in1,8 |
|
dsll $tmp2,8 |
|
dsll $tmp4,8 |
|
and $in0,$tmp0 |
|
and $in1,$tmp0 |
|
or $tmp1,$tmp2 |
|
or $tmp3,$tmp4 |
|
or $in0,$tmp1 |
|
or $in1,$tmp3 |
|
dsrl $tmp1,$in0,32 |
|
dsrl $tmp3,$in1,32 |
|
dsll $in0,32 |
|
dsll $in1,32 |
|
or $in0,$tmp1 |
|
or $in1,$tmp3 |
|
# endif |
|
#endif |
|
dsrl $tmp1,$h2,2 # modulo-scheduled reduction |
|
andi $h2,$h2,3 |
|
dsll $tmp0,$tmp1,2 |
|
|
|
daddu $d0,$h0,$in0 # accumulate input |
|
daddu $tmp1,$tmp0 |
|
sltu $tmp0,$d0,$h0 |
|
daddu $d0,$d0,$tmp1 # ... and residue |
|
sltu $tmp1,$d0,$tmp1 |
|
daddu $d1,$h1,$in1 |
|
daddu $tmp0,$tmp1 |
|
sltu $tmp1,$d1,$h1 |
|
daddu $d1,$tmp0 |
|
|
|
dmultu ($r0,$d0) # h0*r0 |
|
daddu $d2,$h2,$padbit |
|
sltu $tmp0,$d1,$tmp0 |
|
mflo ($h0,$r0,$d0) |
|
mfhi ($h1,$r0,$d0) |
|
|
|
dmultu ($rs1,$d1) # h1*5*r1 |
|
daddu $d2,$tmp1 |
|
daddu $d2,$tmp0 |
|
mflo ($tmp0,$rs1,$d1) |
|
mfhi ($tmp1,$rs1,$d1) |
|
|
|
dmultu ($r1,$d0) # h0*r1 |
|
mflo ($tmp2,$r1,$d0) |
|
mfhi ($h2,$r1,$d0) |
|
daddu $h0,$tmp0 |
|
daddu $h1,$tmp1 |
|
sltu $tmp0,$h0,$tmp0 |
|
|
|
dmultu ($r0,$d1) # h1*r0 |
|
daddu $h1,$tmp0 |
|
daddu $h1,$tmp2 |
|
mflo ($tmp0,$r0,$d1) |
|
mfhi ($tmp1,$r0,$d1) |
|
|
|
dmultu ($rs1,$d2) # h2*5*r1 |
|
sltu $tmp2,$h1,$tmp2 |
|
daddu $h2,$tmp2 |
|
mflo ($tmp2,$rs1,$d2) |
|
|
|
dmultu ($r0,$d2) # h2*r0 |
|
daddu $h1,$tmp0 |
|
daddu $h2,$tmp1 |
|
mflo ($tmp3,$r0,$d2) |
|
sltu $tmp0,$h1,$tmp0 |
|
daddu $h2,$tmp0 |
|
|
|
daddu $h1,$tmp2 |
|
sltu $tmp2,$h1,$tmp2 |
|
daddu $h2,$tmp2 |
|
daddu $h2,$tmp3 |
|
|
|
bne $inp,$len,.Loop |
|
|
|
sd $h0,0($ctx) # store hash value |
|
sd $h1,8($ctx) |
|
sd $h2,16($ctx) |
|
|
|
.set noreorder |
|
#if defined(_MIPS_ARCH_MIPS64R6) |
|
ld $s7,56($sp) |
|
ld $s6,48($sp) |
|
#endif |
|
ld $s5,40($sp) # epilogue |
|
ld $s4,32($sp) |
|
___ |
|
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue |
|
ld $s3,24($sp) |
|
ld $s2,16($sp) |
|
ld $s1,8($sp) |
|
ld $s0,0($sp) |
|
___ |
|
$code.=<<___; |
|
jr $ra |
|
#if defined(_MIPS_ARCH_MIPS64R6) |
|
daddu $sp,8*8 |
|
#else |
|
daddu $sp,6*8 |
|
#endif |
|
.end poly1305_blocks_internal |
|
___ |
|
} |
|
{ |
|
my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); |
|
|
|
$code.=<<___; |
|
.align 5 |
|
.globl poly1305_emit |
|
.ent poly1305_emit |
|
poly1305_emit: |
|
.frame $sp,0,$ra |
|
.set reorder |
|
|
|
ld $tmp2,16($ctx) |
|
ld $tmp0,0($ctx) |
|
ld $tmp1,8($ctx) |
|
|
|
li $in0,-4 # final reduction |
|
dsrl $in1,$tmp2,2 |
|
and $in0,$tmp2 |
|
andi $tmp2,$tmp2,3 |
|
daddu $in0,$in1 |
|
|
|
daddu $tmp0,$tmp0,$in0 |
|
sltu $in1,$tmp0,$in0 |
|
daddiu $in0,$tmp0,5 # compare to modulus |
|
daddu $tmp1,$tmp1,$in1 |
|
sltiu $tmp3,$in0,5 |
|
sltu $tmp4,$tmp1,$in1 |
|
daddu $in1,$tmp1,$tmp3 |
|
daddu $tmp2,$tmp2,$tmp4 |
|
sltu $tmp3,$in1,$tmp3 |
|
daddu $tmp2,$tmp2,$tmp3 |
|
|
|
dsrl $tmp2,2 # see if it carried/borrowed |
|
dsubu $tmp2,$zero,$tmp2 |
|
|
|
xor $in0,$tmp0 |
|
xor $in1,$tmp1 |
|
and $in0,$tmp2 |
|
and $in1,$tmp2 |
|
xor $in0,$tmp0 |
|
xor $in1,$tmp1 |
|
|
|
lwu $tmp0,0($nonce) # load nonce |
|
lwu $tmp1,4($nonce) |
|
lwu $tmp2,8($nonce) |
|
lwu $tmp3,12($nonce) |
|
dsll $tmp1,32 |
|
dsll $tmp3,32 |
|
or $tmp0,$tmp1 |
|
or $tmp2,$tmp3 |
|
|
|
daddu $in0,$tmp0 # accumulate nonce |
|
daddu $in1,$tmp2 |
|
sltu $tmp0,$in0,$tmp0 |
|
daddu $in1,$tmp0 |
|
|
|
dsrl $tmp0,$in0,8 # write mac value |
|
dsrl $tmp1,$in0,16 |
|
dsrl $tmp2,$in0,24 |
|
sb $in0,0($mac) |
|
dsrl $tmp3,$in0,32 |
|
sb $tmp0,1($mac) |
|
dsrl $tmp0,$in0,40 |
|
sb $tmp1,2($mac) |
|
dsrl $tmp1,$in0,48 |
|
sb $tmp2,3($mac) |
|
dsrl $tmp2,$in0,56 |
|
sb $tmp3,4($mac) |
|
dsrl $tmp3,$in1,8 |
|
sb $tmp0,5($mac) |
|
dsrl $tmp0,$in1,16 |
|
sb $tmp1,6($mac) |
|
dsrl $tmp1,$in1,24 |
|
sb $tmp2,7($mac) |
|
|
|
sb $in1,8($mac) |
|
dsrl $tmp2,$in1,32 |
|
sb $tmp3,9($mac) |
|
dsrl $tmp3,$in1,40 |
|
sb $tmp0,10($mac) |
|
dsrl $tmp0,$in1,48 |
|
sb $tmp1,11($mac) |
|
dsrl $tmp1,$in1,56 |
|
sb $tmp2,12($mac) |
|
sb $tmp3,13($mac) |
|
sb $tmp0,14($mac) |
|
sb $tmp1,15($mac) |
|
|
|
jr $ra |
|
.end poly1305_emit |
|
.rdata |
|
.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" |
|
.align 2 |
|
___ |
|
} |
|
}}} else {{{ |
|
###################################################################### |
|
# 32-bit code path |
|
# |
|
|
|
my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); |
|
my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = |
|
($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); |
|
|
|
$code.=<<___; |
|
#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ |
|
defined(_MIPS_ARCH_MIPS32R6)) \\ |
|
&& !defined(_MIPS_ARCH_MIPS32R2) |
|
# define _MIPS_ARCH_MIPS32R2 |
|
#endif |
|
|
|
#if defined(_MIPS_ARCH_MIPS32R6) |
|
# define multu(rs,rt) |
|
# define mflo(rd,rs,rt) mulu rd,rs,rt |
|
# define mfhi(rd,rs,rt) muhu rd,rs,rt |
|
#else |
|
# define multu(rs,rt) multu rs,rt |
|
# define mflo(rd,rs,rt) mflo rd |
|
# define mfhi(rd,rs,rt) mfhi rd |
|
#endif |
|
|
|
#ifdef __KERNEL__ |
|
# define poly1305_init poly1305_init_mips |
|
# define poly1305_blocks poly1305_blocks_mips |
|
# define poly1305_emit poly1305_emit_mips |
|
#endif |
|
|
|
#if defined(__MIPSEB__) && !defined(MIPSEB) |
|
# define MIPSEB |
|
#endif |
|
|
|
#ifdef MIPSEB |
|
# define MSB 0 |
|
# define LSB 3 |
|
#else |
|
# define MSB 3 |
|
# define LSB 0 |
|
#endif |
|
|
|
.text |
|
.set noat |
|
.set noreorder |
|
|
|
.align 5 |
|
.globl poly1305_init |
|
.ent poly1305_init |
|
poly1305_init: |
|
.frame $sp,0,$ra |
|
.set reorder |
|
|
|
sw $zero,0($ctx) |
|
sw $zero,4($ctx) |
|
sw $zero,8($ctx) |
|
sw $zero,12($ctx) |
|
sw $zero,16($ctx) |
|
|
|
beqz $inp,.Lno_key |
|
|
|
#if defined(_MIPS_ARCH_MIPS32R6) |
|
andi $tmp0,$inp,3 # $inp % 4 |
|
subu $inp,$inp,$tmp0 # align $inp |
|
sll $tmp0,$tmp0,3 # byte to bit offset |
|
lw $in0,0($inp) |
|
lw $in1,4($inp) |
|
lw $in2,8($inp) |
|
lw $in3,12($inp) |
|
beqz $tmp0,.Laligned_key |
|
|
|
lw $tmp2,16($inp) |
|
subu $tmp1,$zero,$tmp0 |
|
# ifdef MIPSEB |
|
sllv $in0,$in0,$tmp0 |
|
srlv $tmp3,$in1,$tmp1 |
|
sllv $in1,$in1,$tmp0 |
|
or $in0,$in0,$tmp3 |
|
srlv $tmp3,$in2,$tmp1 |
|
sllv $in2,$in2,$tmp0 |
|
or $in1,$in1,$tmp3 |
|
srlv $tmp3,$in3,$tmp1 |
|
sllv $in3,$in3,$tmp0 |
|
or $in2,$in2,$tmp3 |
|
srlv $tmp2,$tmp2,$tmp1 |
|
or $in3,$in3,$tmp2 |
|
# else |
|
srlv $in0,$in0,$tmp0 |
|
sllv $tmp3,$in1,$tmp1 |
|
srlv $in1,$in1,$tmp0 |
|
or $in0,$in0,$tmp3 |
|
sllv $tmp3,$in2,$tmp1 |
|
srlv $in2,$in2,$tmp0 |
|
or $in1,$in1,$tmp3 |
|
sllv $tmp3,$in3,$tmp1 |
|
srlv $in3,$in3,$tmp0 |
|
or $in2,$in2,$tmp3 |
|
sllv $tmp2,$tmp2,$tmp1 |
|
or $in3,$in3,$tmp2 |
|
# endif |
|
.Laligned_key: |
|
#else |
|
lwl $in0,0+MSB($inp) |
|
lwl $in1,4+MSB($inp) |
|
lwl $in2,8+MSB($inp) |
|
lwl $in3,12+MSB($inp) |
|
lwr $in0,0+LSB($inp) |
|
lwr $in1,4+LSB($inp) |
|
lwr $in2,8+LSB($inp) |
|
lwr $in3,12+LSB($inp) |
|
#endif |
|
#ifdef MIPSEB |
|
# if defined(_MIPS_ARCH_MIPS32R2) |
|
wsbh $in0,$in0 # byte swap |
|
wsbh $in1,$in1 |
|
wsbh $in2,$in2 |
|
wsbh $in3,$in3 |
|
rotr $in0,$in0,16 |
|
rotr $in1,$in1,16 |
|
rotr $in2,$in2,16 |
|
rotr $in3,$in3,16 |
|
# else |
|
srl $tmp0,$in0,24 # byte swap |
|
srl $tmp1,$in0,8 |
|
andi $tmp2,$in0,0xFF00 |
|
sll $in0,$in0,24 |
|
andi $tmp1,0xFF00 |
|
sll $tmp2,$tmp2,8 |
|
or $in0,$tmp0 |
|
srl $tmp0,$in1,24 |
|
or $tmp1,$tmp2 |
|
srl $tmp2,$in1,8 |
|
or $in0,$tmp1 |
|
andi $tmp1,$in1,0xFF00 |
|
sll $in1,$in1,24 |
|
andi $tmp2,0xFF00 |
|
sll $tmp1,$tmp1,8 |
|
or $in1,$tmp0 |
|
srl $tmp0,$in2,24 |
|
or $tmp2,$tmp1 |
|
srl $tmp1,$in2,8 |
|
or $in1,$tmp2 |
|
andi $tmp2,$in2,0xFF00 |
|
sll $in2,$in2,24 |
|
andi $tmp1,0xFF00 |
|
sll $tmp2,$tmp2,8 |
|
or $in2,$tmp0 |
|
srl $tmp0,$in3,24 |
|
or $tmp1,$tmp2 |
|
srl $tmp2,$in3,8 |
|
or $in2,$tmp1 |
|
andi $tmp1,$in3,0xFF00 |
|
sll $in3,$in3,24 |
|
andi $tmp2,0xFF00 |
|
sll $tmp1,$tmp1,8 |
|
or $in3,$tmp0 |
|
or $tmp2,$tmp1 |
|
or $in3,$tmp2 |
|
# endif |
|
#endif |
|
lui $tmp0,0x0fff |
|
ori $tmp0,0xffff # 0x0fffffff |
|
and $in0,$in0,$tmp0 |
|
subu $tmp0,3 # 0x0ffffffc |
|
and $in1,$in1,$tmp0 |
|
and $in2,$in2,$tmp0 |
|
and $in3,$in3,$tmp0 |
|
|
|
sw $in0,20($ctx) |
|
sw $in1,24($ctx) |
|
sw $in2,28($ctx) |
|
sw $in3,32($ctx) |
|
|
|
srl $tmp1,$in1,2 |
|
srl $tmp2,$in2,2 |
|
srl $tmp3,$in3,2 |
|
addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) |
|
addu $in2,$in2,$tmp2 |
|
addu $in3,$in3,$tmp3 |
|
sw $in1,36($ctx) |
|
sw $in2,40($ctx) |
|
sw $in3,44($ctx) |
|
.Lno_key: |
|
li $v0,0 |
|
jr $ra |
|
.end poly1305_init |
|
___ |
|
{ |
|
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; |
|
|
|
my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = |
|
($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); |
|
my ($d0,$d1,$d2,$d3) = |
|
($a4,$a5,$a6,$a7); |
|
my $shr = $t2; # used on R6 |
|
my $one = $t2; # used on R2 |
|
|
|
$code.=<<___; |
|
.globl poly1305_blocks |
|
.align 5 |
|
.ent poly1305_blocks |
|
poly1305_blocks: |
|
.frame $sp,16*4,$ra |
|
.mask $SAVED_REGS_MASK,-4 |
|
.set noreorder |
|
subu $sp, $sp,4*12 |
|
sw $s11,4*11($sp) |
|
sw $s10,4*10($sp) |
|
sw $s9, 4*9($sp) |
|
sw $s8, 4*8($sp) |
|
sw $s7, 4*7($sp) |
|
sw $s6, 4*6($sp) |
|
sw $s5, 4*5($sp) |
|
sw $s4, 4*4($sp) |
|
___ |
|
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue |
|
sw $s3, 4*3($sp) |
|
sw $s2, 4*2($sp) |
|
sw $s1, 4*1($sp) |
|
sw $s0, 4*0($sp) |
|
___ |
|
$code.=<<___; |
|
.set reorder |
|
|
|
srl $len,4 # number of complete blocks |
|
li $one,1 |
|
beqz $len,.Labort |
|
|
|
#if defined(_MIPS_ARCH_MIPS32R6) |
|
andi $shr,$inp,3 |
|
subu $inp,$inp,$shr # align $inp |
|
sll $shr,$shr,3 # byte to bit offset |
|
#endif |
|
|
|
lw $h0,0($ctx) # load hash value |
|
lw $h1,4($ctx) |
|
lw $h2,8($ctx) |
|
lw $h3,12($ctx) |
|
lw $h4,16($ctx) |
|
|
|
lw $r0,20($ctx) # load key |
|
lw $r1,24($ctx) |
|
lw $r2,28($ctx) |
|
lw $r3,32($ctx) |
|
lw $rs1,36($ctx) |
|
lw $rs2,40($ctx) |
|
lw $rs3,44($ctx) |
|
|
|
sll $len,4 |
|
addu $len,$len,$inp # end of buffer |
|
b .Loop |
|
|
|
.align 4 |
|
.Loop: |
|
#if defined(_MIPS_ARCH_MIPS32R6) |
|
lw $d0,0($inp) # load input |
|
lw $d1,4($inp) |
|
lw $d2,8($inp) |
|
lw $d3,12($inp) |
|
beqz $shr,.Laligned_inp |
|
|
|
lw $t0,16($inp) |
|
subu $t1,$zero,$shr |
|
# ifdef MIPSEB |
|
sllv $d0,$d0,$shr |
|
srlv $at,$d1,$t1 |
|
sllv $d1,$d1,$shr |
|
or $d0,$d0,$at |
|
srlv $at,$d2,$t1 |
|
sllv $d2,$d2,$shr |
|
or $d1,$d1,$at |
|
srlv $at,$d3,$t1 |
|
sllv $d3,$d3,$shr |
|
or $d2,$d2,$at |
|
srlv $t0,$t0,$t1 |
|
or $d3,$d3,$t0 |
|
# else |
|
srlv $d0,$d0,$shr |
|
sllv $at,$d1,$t1 |
|
srlv $d1,$d1,$shr |
|
or $d0,$d0,$at |
|
sllv $at,$d2,$t1 |
|
srlv $d2,$d2,$shr |
|
or $d1,$d1,$at |
|
sllv $at,$d3,$t1 |
|
srlv $d3,$d3,$shr |
|
or $d2,$d2,$at |
|
sllv $t0,$t0,$t1 |
|
or $d3,$d3,$t0 |
|
# endif |
|
.Laligned_inp: |
|
#else |
|
lwl $d0,0+MSB($inp) # load input |
|
lwl $d1,4+MSB($inp) |
|
lwl $d2,8+MSB($inp) |
|
lwl $d3,12+MSB($inp) |
|
lwr $d0,0+LSB($inp) |
|
lwr $d1,4+LSB($inp) |
|
lwr $d2,8+LSB($inp) |
|
lwr $d3,12+LSB($inp) |
|
#endif |
|
#ifdef MIPSEB |
|
# if defined(_MIPS_ARCH_MIPS32R2) |
|
wsbh $d0,$d0 # byte swap |
|
wsbh $d1,$d1 |
|
wsbh $d2,$d2 |
|
wsbh $d3,$d3 |
|
rotr $d0,$d0,16 |
|
rotr $d1,$d1,16 |
|
rotr $d2,$d2,16 |
|
rotr $d3,$d3,16 |
|
# else |
|
srl $at,$d0,24 # byte swap |
|
srl $t0,$d0,8 |
|
andi $t1,$d0,0xFF00 |
|
sll $d0,$d0,24 |
|
andi $t0,0xFF00 |
|
sll $t1,$t1,8 |
|
or $d0,$at |
|
srl $at,$d1,24 |
|
or $t0,$t1 |
|
srl $t1,$d1,8 |
|
or $d0,$t0 |
|
andi $t0,$d1,0xFF00 |
|
sll $d1,$d1,24 |
|
andi $t1,0xFF00 |
|
sll $t0,$t0,8 |
|
or $d1,$at |
|
srl $at,$d2,24 |
|
or $t1,$t0 |
|
srl $t0,$d2,8 |
|
or $d1,$t1 |
|
andi $t1,$d2,0xFF00 |
|
sll $d2,$d2,24 |
|
andi $t0,0xFF00 |
|
sll $t1,$t1,8 |
|
or $d2,$at |
|
srl $at,$d3,24 |
|
or $t0,$t1 |
|
srl $t1,$d3,8 |
|
or $d2,$t0 |
|
andi $t0,$d3,0xFF00 |
|
sll $d3,$d3,24 |
|
andi $t1,0xFF00 |
|
sll $t0,$t0,8 |
|
or $d3,$at |
|
or $t1,$t0 |
|
or $d3,$t1 |
|
# endif |
|
#endif |
|
srl $t0,$h4,2 # modulo-scheduled reduction |
|
andi $h4,$h4,3 |
|
sll $at,$t0,2 |
|
|
|
addu $d0,$d0,$h0 # accumulate input |
|
addu $t0,$t0,$at |
|
sltu $h0,$d0,$h0 |
|
addu $d0,$d0,$t0 # ... and residue |
|
sltu $at,$d0,$t0 |
|
|
|
addu $d1,$d1,$h1 |
|
addu $h0,$h0,$at # carry |
|
sltu $h1,$d1,$h1 |
|
addu $d1,$d1,$h0 |
|
sltu $h0,$d1,$h0 |
|
|
|
addu $d2,$d2,$h2 |
|
addu $h1,$h1,$h0 # carry |
|
sltu $h2,$d2,$h2 |
|
addu $d2,$d2,$h1 |
|
sltu $h1,$d2,$h1 |
|
|
|
addu $d3,$d3,$h3 |
|
addu $h2,$h2,$h1 # carry |
|
sltu $h3,$d3,$h3 |
|
addu $d3,$d3,$h2 |
|
|
|
#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) |
|
multu $r0,$d0 # d0*r0 |
|
sltu $h2,$d3,$h2 |
|
maddu $rs3,$d1 # d1*s3 |
|
addu $h3,$h3,$h2 # carry |
|
maddu $rs2,$d2 # d2*s2 |
|
addu $h4,$h4,$padbit |
|
maddu $rs1,$d3 # d3*s1 |
|
addu $h4,$h4,$h3 |
|
mfhi $at |
|
mflo $h0 |
|
|
|
multu $r1,$d0 # d0*r1 |
|
maddu $r0,$d1 # d1*r0 |
|
maddu $rs3,$d2 # d2*s3 |
|
maddu $rs2,$d3 # d3*s2 |
|
maddu $rs1,$h4 # h4*s1 |
|
maddu $at,$one # hi*1 |
|
mfhi $at |
|
mflo $h1 |
|
|
|
multu $r2,$d0 # d0*r2 |
|
maddu $r1,$d1 # d1*r1 |
|
maddu $r0,$d2 # d2*r0 |
|
maddu $rs3,$d3 # d3*s3 |
|
maddu $rs2,$h4 # h4*s2 |
|
maddu $at,$one # hi*1 |
|
mfhi $at |
|
mflo $h2 |
|
|
|
mul $t0,$r0,$h4 # h4*r0 |
|
|
|
multu $r3,$d0 # d0*r3 |
|
maddu $r2,$d1 # d1*r2 |
|
maddu $r1,$d2 # d2*r1 |
|
maddu $r0,$d3 # d3*r0 |
|
maddu $rs3,$h4 # h4*s3 |
|
maddu $at,$one # hi*1 |
|
mfhi $at |
|
mflo $h3 |
|
|
|
addiu $inp,$inp,16 |
|
|
|
addu $h4,$t0,$at |
|
#else |
|
multu ($r0,$d0) # d0*r0 |
|
mflo ($h0,$r0,$d0) |
|
mfhi ($h1,$r0,$d0) |
|
|
|
sltu $h2,$d3,$h2 |
|
addu $h3,$h3,$h2 # carry |
|
|
|
multu ($rs3,$d1) # d1*s3 |
|
mflo ($at,$rs3,$d1) |
|
mfhi ($t0,$rs3,$d1) |
|
|
|
addu $h4,$h4,$padbit |
|
addiu $inp,$inp,16 |
|
addu $h4,$h4,$h3 |
|
|
|
multu ($rs2,$d2) # d2*s2 |
|
mflo ($a3,$rs2,$d2) |
|
mfhi ($t1,$rs2,$d2) |
|
addu $h0,$h0,$at |
|
addu $h1,$h1,$t0 |
|
multu ($rs1,$d3) # d3*s1 |
|
sltu $at,$h0,$at |
|
addu $h1,$h1,$at |
|
|
|
mflo ($at,$rs1,$d3) |
|
mfhi ($t0,$rs1,$d3) |
|
addu $h0,$h0,$a3 |
|
addu $h1,$h1,$t1 |
|
multu ($r1,$d0) # d0*r1 |
|
sltu $a3,$h0,$a3 |
|
addu $h1,$h1,$a3 |
|
|
|
|
|
mflo ($a3,$r1,$d0) |
|
mfhi ($h2,$r1,$d0) |
|
addu $h0,$h0,$at |
|
addu $h1,$h1,$t0 |
|
multu ($r0,$d1) # d1*r0 |
|
sltu $at,$h0,$at |
|
addu $h1,$h1,$at |
|
|
|
mflo ($at,$r0,$d1) |
|
mfhi ($t0,$r0,$d1) |
|
addu $h1,$h1,$a3 |
|
sltu $a3,$h1,$a3 |
|
multu ($rs3,$d2) # d2*s3 |
|
addu $h2,$h2,$a3 |
|
|
|
mflo ($a3,$rs3,$d2) |
|
mfhi ($t1,$rs3,$d2) |
|
addu $h1,$h1,$at |
|
addu $h2,$h2,$t0 |
|
multu ($rs2,$d3) # d3*s2 |
|
sltu $at,$h1,$at |
|
addu $h2,$h2,$at |
|
|
|
mflo ($at,$rs2,$d3) |
|
mfhi ($t0,$rs2,$d3) |
|
addu $h1,$h1,$a3 |
|
addu $h2,$h2,$t1 |
|
multu ($rs1,$h4) # h4*s1 |
|
sltu $a3,$h1,$a3 |
|
addu $h2,$h2,$a3 |
|
|
|
mflo ($a3,$rs1,$h4) |
|
addu $h1,$h1,$at |
|
addu $h2,$h2,$t0 |
|
multu ($r2,$d0) # d0*r2 |
|
sltu $at,$h1,$at |
|
addu $h2,$h2,$at |
|
|
|
|
|
mflo ($at,$r2,$d0) |
|
mfhi ($h3,$r2,$d0) |
|
addu $h1,$h1,$a3 |
|
sltu $a3,$h1,$a3 |
|
multu ($r1,$d1) # d1*r1 |
|
addu $h2,$h2,$a3 |
|
|
|
mflo ($a3,$r1,$d1) |
|
mfhi ($t1,$r1,$d1) |
|
addu $h2,$h2,$at |
|
sltu $at,$h2,$at |
|
multu ($r0,$d2) # d2*r0 |
|
addu $h3,$h3,$at |
|
|
|
mflo ($at,$r0,$d2) |
|
mfhi ($t0,$r0,$d2) |
|
addu $h2,$h2,$a3 |
|
addu $h3,$h3,$t1 |
|
multu ($rs3,$d3) # d3*s3 |
|
sltu $a3,$h2,$a3 |
|
addu $h3,$h3,$a3 |
|
|
|
mflo ($a3,$rs3,$d3) |
|
mfhi ($t1,$rs3,$d3) |
|
addu $h2,$h2,$at |
|
addu $h3,$h3,$t0 |
|
multu ($rs2,$h4) # h4*s2 |
|
sltu $at,$h2,$at |
|
addu $h3,$h3,$at |
|
|
|
mflo ($at,$rs2,$h4) |
|
addu $h2,$h2,$a3 |
|
addu $h3,$h3,$t1 |
|
multu ($r3,$d0) # d0*r3 |
|
sltu $a3,$h2,$a3 |
|
addu $h3,$h3,$a3 |
|
|
|
|
|
mflo ($a3,$r3,$d0) |
|
mfhi ($t1,$r3,$d0) |
|
addu $h2,$h2,$at |
|
sltu $at,$h2,$at |
|
multu ($r2,$d1) # d1*r2 |
|
addu $h3,$h3,$at |
|
|
|
mflo ($at,$r2,$d1) |
|
mfhi ($t0,$r2,$d1) |
|
addu $h3,$h3,$a3 |
|
sltu $a3,$h3,$a3 |
|
multu ($r0,$d3) # d3*r0 |
|
addu $t1,$t1,$a3 |
|
|
|
mflo ($a3,$r0,$d3) |
|
mfhi ($d3,$r0,$d3) |
|
addu $h3,$h3,$at |
|
addu $t1,$t1,$t0 |
|
multu ($r1,$d2) # d2*r1 |
|
sltu $at,$h3,$at |
|
addu $t1,$t1,$at |
|
|
|
mflo ($at,$r1,$d2) |
|
mfhi ($t0,$r1,$d2) |
|
addu $h3,$h3,$a3 |
|
addu $t1,$t1,$d3 |
|
multu ($rs3,$h4) # h4*s3 |
|
sltu $a3,$h3,$a3 |
|
addu $t1,$t1,$a3 |
|
|
|
mflo ($a3,$rs3,$h4) |
|
addu $h3,$h3,$at |
|
addu $t1,$t1,$t0 |
|
multu ($r0,$h4) # h4*r0 |
|
sltu $at,$h3,$at |
|
addu $t1,$t1,$at |
|
|
|
|
|
mflo ($h4,$r0,$h4) |
|
addu $h3,$h3,$a3 |
|
sltu $a3,$h3,$a3 |
|
addu $t1,$t1,$a3 |
|
addu $h4,$h4,$t1 |
|
|
|
li $padbit,1 # if we loop, padbit is 1 |
|
#endif |
|
bne $inp,$len,.Loop |
|
|
|
sw $h0,0($ctx) # store hash value |
|
sw $h1,4($ctx) |
|
sw $h2,8($ctx) |
|
sw $h3,12($ctx) |
|
sw $h4,16($ctx) |
|
|
|
.set noreorder |
|
.Labort: |
|
lw $s11,4*11($sp) |
|
lw $s10,4*10($sp) |
|
lw $s9, 4*9($sp) |
|
lw $s8, 4*8($sp) |
|
lw $s7, 4*7($sp) |
|
lw $s6, 4*6($sp) |
|
lw $s5, 4*5($sp) |
|
lw $s4, 4*4($sp) |
|
___ |
|
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue |
|
lw $s3, 4*3($sp) |
|
lw $s2, 4*2($sp) |
|
lw $s1, 4*1($sp) |
|
lw $s0, 4*0($sp) |
|
___ |
|
$code.=<<___; |
|
jr $ra |
|
addu $sp,$sp,4*12 |
|
.end poly1305_blocks |
|
___ |
|
} |
|
{ |
|
my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); |
|
|
|
$code.=<<___; |
|
.align 5 |
|
.globl poly1305_emit |
|
.ent poly1305_emit |
|
poly1305_emit: |
|
.frame $sp,0,$ra |
|
.set reorder |
|
|
|
lw $tmp4,16($ctx) |
|
lw $tmp0,0($ctx) |
|
lw $tmp1,4($ctx) |
|
lw $tmp2,8($ctx) |
|
lw $tmp3,12($ctx) |
|
|
|
li $in0,-4 # final reduction |
|
srl $ctx,$tmp4,2 |
|
and $in0,$in0,$tmp4 |
|
andi $tmp4,$tmp4,3 |
|
addu $ctx,$ctx,$in0 |
|
|
|
addu $tmp0,$tmp0,$ctx |
|
sltu $ctx,$tmp0,$ctx |
|
addiu $in0,$tmp0,5 # compare to modulus |
|
addu $tmp1,$tmp1,$ctx |
|
sltiu $in1,$in0,5 |
|
sltu $ctx,$tmp1,$ctx |
|
addu $in1,$in1,$tmp1 |
|
addu $tmp2,$tmp2,$ctx |
|
sltu $in2,$in1,$tmp1 |
|
sltu $ctx,$tmp2,$ctx |
|
addu $in2,$in2,$tmp2 |
|
addu $tmp3,$tmp3,$ctx |
|
sltu $in3,$in2,$tmp2 |
|
sltu $ctx,$tmp3,$ctx |
|
addu $in3,$in3,$tmp3 |
|
addu $tmp4,$tmp4,$ctx |
|
sltu $ctx,$in3,$tmp3 |
|
addu $ctx,$tmp4 |
|
|
|
srl $ctx,2 # see if it carried/borrowed |
|
subu $ctx,$zero,$ctx |
|
|
|
xor $in0,$tmp0 |
|
xor $in1,$tmp1 |
|
xor $in2,$tmp2 |
|
xor $in3,$tmp3 |
|
and $in0,$ctx |
|
and $in1,$ctx |
|
and $in2,$ctx |
|
and $in3,$ctx |
|
xor $in0,$tmp0 |
|
xor $in1,$tmp1 |
|
xor $in2,$tmp2 |
|
xor $in3,$tmp3 |
|
|
|
lw $tmp0,0($nonce) # load nonce |
|
lw $tmp1,4($nonce) |
|
lw $tmp2,8($nonce) |
|
lw $tmp3,12($nonce) |
|
|
|
addu $in0,$tmp0 # accumulate nonce |
|
sltu $ctx,$in0,$tmp0 |
|
|
|
addu $in1,$tmp1 |
|
sltu $tmp1,$in1,$tmp1 |
|
addu $in1,$ctx |
|
sltu $ctx,$in1,$ctx |
|
addu $ctx,$tmp1 |
|
|
|
addu $in2,$tmp2 |
|
sltu $tmp2,$in2,$tmp2 |
|
addu $in2,$ctx |
|
sltu $ctx,$in2,$ctx |
|
addu $ctx,$tmp2 |
|
|
|
addu $in3,$tmp3 |
|
addu $in3,$ctx |
|
|
|
srl $tmp0,$in0,8 # write mac value |
|
srl $tmp1,$in0,16 |
|
srl $tmp2,$in0,24 |
|
sb $in0, 0($mac) |
|
sb $tmp0,1($mac) |
|
srl $tmp0,$in1,8 |
|
sb $tmp1,2($mac) |
|
srl $tmp1,$in1,16 |
|
sb $tmp2,3($mac) |
|
srl $tmp2,$in1,24 |
|
sb $in1, 4($mac) |
|
sb $tmp0,5($mac) |
|
srl $tmp0,$in2,8 |
|
sb $tmp1,6($mac) |
|
srl $tmp1,$in2,16 |
|
sb $tmp2,7($mac) |
|
srl $tmp2,$in2,24 |
|
sb $in2, 8($mac) |
|
sb $tmp0,9($mac) |
|
srl $tmp0,$in3,8 |
|
sb $tmp1,10($mac) |
|
srl $tmp1,$in3,16 |
|
sb $tmp2,11($mac) |
|
srl $tmp2,$in3,24 |
|
sb $in3, 12($mac) |
|
sb $tmp0,13($mac) |
|
sb $tmp1,14($mac) |
|
sb $tmp2,15($mac) |
|
|
|
jr $ra |
|
.end poly1305_emit |
|
.rdata |
|
.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" |
|
.align 2 |
|
___ |
|
} |
|
}}} |
|
|
|
$output=pop and open STDOUT,">$output"; |
|
print $code; |
|
close STDOUT;
|
|
|