mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
223 lines
4.7 KiB
223 lines
4.7 KiB
/* SPDX-License-Identifier: GPL-2.0-only */ |
|
/* |
|
* Scalar AES core transform |
|
* |
|
* Copyright (C) 2017 Linaro Ltd. |
|
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> |
|
*/ |
|
|
|
#include <linux/linkage.h> |
|
#include <asm/assembler.h> |
|
#include <asm/cache.h> |
|
|
|
.text |
|
.align 5 |
|
|
|
rk .req r0 |
|
rounds .req r1 |
|
in .req r2 |
|
out .req r3 |
|
ttab .req ip |
|
|
|
t0 .req lr |
|
t1 .req r2 |
|
t2 .req r3 |
|
|
|
.macro __select, out, in, idx |
|
.if __LINUX_ARM_ARCH__ < 7 |
|
and \out, \in, #0xff << (8 * \idx) |
|
.else |
|
ubfx \out, \in, #(8 * \idx), #8 |
|
.endif |
|
.endm |
|
|
|
.macro __load, out, in, idx, sz, op |
|
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0 |
|
ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] |
|
.else |
|
ldr\op \out, [ttab, \in, lsl #\sz] |
|
.endif |
|
.endm |
|
|
|
.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr |
|
__select \out0, \in0, 0 |
|
__select t0, \in1, 1 |
|
__load \out0, \out0, 0, \sz, \op |
|
__load t0, t0, 1, \sz, \op |
|
|
|
.if \enc |
|
__select \out1, \in1, 0 |
|
__select t1, \in2, 1 |
|
.else |
|
__select \out1, \in3, 0 |
|
__select t1, \in0, 1 |
|
.endif |
|
__load \out1, \out1, 0, \sz, \op |
|
__select t2, \in2, 2 |
|
__load t1, t1, 1, \sz, \op |
|
__load t2, t2, 2, \sz, \op |
|
|
|
eor \out0, \out0, t0, ror #24 |
|
|
|
__select t0, \in3, 3 |
|
.if \enc |
|
__select \t3, \in3, 2 |
|
__select \t4, \in0, 3 |
|
.else |
|
__select \t3, \in1, 2 |
|
__select \t4, \in2, 3 |
|
.endif |
|
__load \t3, \t3, 2, \sz, \op |
|
__load t0, t0, 3, \sz, \op |
|
__load \t4, \t4, 3, \sz, \op |
|
|
|
.ifnb \oldcpsr |
|
/* |
|
* This is the final round and we're done with all data-dependent table |
|
* lookups, so we can safely re-enable interrupts. |
|
*/ |
|
restore_irqs \oldcpsr |
|
.endif |
|
|
|
eor \out1, \out1, t1, ror #24 |
|
eor \out0, \out0, t2, ror #16 |
|
ldm rk!, {t1, t2} |
|
eor \out1, \out1, \t3, ror #16 |
|
eor \out0, \out0, t0, ror #8 |
|
eor \out1, \out1, \t4, ror #8 |
|
eor \out0, \out0, t1 |
|
eor \out1, \out1, t2 |
|
.endm |
|
|
|
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr |
|
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op |
|
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr |
|
.endm |
|
|
|
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr |
|
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op |
|
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr |
|
.endm |
|
|
|
.macro __rev, out, in |
|
.if __LINUX_ARM_ARCH__ < 6 |
|
lsl t0, \in, #24 |
|
and t1, \in, #0xff00 |
|
and t2, \in, #0xff0000 |
|
orr \out, t0, \in, lsr #24 |
|
orr \out, \out, t1, lsl #8 |
|
orr \out, \out, t2, lsr #8 |
|
.else |
|
rev \out, \in |
|
.endif |
|
.endm |
|
|
|
.macro __adrl, out, sym, c |
|
.if __LINUX_ARM_ARCH__ < 7 |
|
ldr\c \out, =\sym |
|
.else |
|
movw\c \out, #:lower16:\sym |
|
movt\c \out, #:upper16:\sym |
|
.endif |
|
.endm |
|
|
|
.macro do_crypt, round, ttab, ltab, bsz |
|
push {r3-r11, lr} |
|
|
|
// Load keys first, to reduce latency in case they're not cached yet. |
|
ldm rk!, {r8-r11} |
|
|
|
ldr r4, [in] |
|
ldr r5, [in, #4] |
|
ldr r6, [in, #8] |
|
ldr r7, [in, #12] |
|
|
|
#ifdef CONFIG_CPU_BIG_ENDIAN |
|
__rev r4, r4 |
|
__rev r5, r5 |
|
__rev r6, r6 |
|
__rev r7, r7 |
|
#endif |
|
|
|
eor r4, r4, r8 |
|
eor r5, r5, r9 |
|
eor r6, r6, r10 |
|
eor r7, r7, r11 |
|
|
|
__adrl ttab, \ttab |
|
/* |
|
* Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into |
|
* L1 cache, assuming cacheline size >= 32. This is a hardening measure |
|
* intended to make cache-timing attacks more difficult. They may not |
|
* be fully prevented, however; see the paper |
|
* https://cr.yp.to/antiforgery/cachetiming-20050414.pdf |
|
* ("Cache-timing attacks on AES") for a discussion of the many |
|
* difficulties involved in writing truly constant-time AES software. |
|
*/ |
|
save_and_disable_irqs t0 |
|
.set i, 0 |
|
.rept 1024 / 128 |
|
ldr r8, [ttab, #i + 0] |
|
ldr r9, [ttab, #i + 32] |
|
ldr r10, [ttab, #i + 64] |
|
ldr r11, [ttab, #i + 96] |
|
.set i, i + 128 |
|
.endr |
|
push {t0} // oldcpsr |
|
|
|
tst rounds, #2 |
|
bne 1f |
|
|
|
0: \round r8, r9, r10, r11, r4, r5, r6, r7 |
|
\round r4, r5, r6, r7, r8, r9, r10, r11 |
|
|
|
1: subs rounds, rounds, #4 |
|
\round r8, r9, r10, r11, r4, r5, r6, r7 |
|
bls 2f |
|
\round r4, r5, r6, r7, r8, r9, r10, r11 |
|
b 0b |
|
|
|
2: .ifb \ltab |
|
add ttab, ttab, #1 |
|
.else |
|
__adrl ttab, \ltab |
|
// Prefetch inverse S-box for final round; see explanation above |
|
.set i, 0 |
|
.rept 256 / 64 |
|
ldr t0, [ttab, #i + 0] |
|
ldr t1, [ttab, #i + 32] |
|
.set i, i + 64 |
|
.endr |
|
.endif |
|
|
|
pop {rounds} // oldcpsr |
|
\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds |
|
|
|
#ifdef CONFIG_CPU_BIG_ENDIAN |
|
__rev r4, r4 |
|
__rev r5, r5 |
|
__rev r6, r6 |
|
__rev r7, r7 |
|
#endif |
|
|
|
ldr out, [sp] |
|
|
|
str r4, [out] |
|
str r5, [out, #4] |
|
str r6, [out, #8] |
|
str r7, [out, #12] |
|
|
|
pop {r3-r11, pc} |
|
|
|
.align 3 |
|
.ltorg |
|
.endm |
|
|
|
ENTRY(__aes_arm_encrypt) |
|
do_crypt fround, crypto_ft_tab,, 2 |
|
ENDPROC(__aes_arm_encrypt) |
|
|
|
.align 5 |
|
ENTRY(__aes_arm_decrypt) |
|
do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 |
|
ENDPROC(__aes_arm_decrypt)
|
|
|