mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
377 lines
9.0 KiB
377 lines
9.0 KiB
// SPDX-License-Identifier: GPL-2.0-or-later |
|
/* |
|
* Glue Code for the AVX assembler implementation of the Cast5 Cipher |
|
* |
|
* Copyright (C) 2012 Johannes Goetzfried |
|
* <[email protected]> |
|
*/ |
|
|
|
#include <asm/crypto/glue_helper.h> |
|
#include <crypto/algapi.h> |
|
#include <crypto/cast5.h> |
|
#include <crypto/internal/simd.h> |
|
#include <linux/crypto.h> |
|
#include <linux/err.h> |
|
#include <linux/module.h> |
|
#include <linux/types.h> |
|
|
|
#define CAST5_PARALLEL_BLOCKS 16 |
|
|
|
asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, |
|
const u8 *src); |
|
asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, |
|
const u8 *src); |
|
asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, |
|
const u8 *src); |
|
asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, |
|
__be64 *iv); |
|
|
|
static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, |
|
unsigned int keylen) |
|
{ |
|
return cast5_setkey(&tfm->base, key, keylen); |
|
} |
|
|
|
static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, |
|
unsigned int nbytes) |
|
{ |
|
return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, |
|
walk, fpu_enabled, nbytes); |
|
} |
|
|
|
static inline void cast5_fpu_end(bool fpu_enabled) |
|
{ |
|
return glue_fpu_end(fpu_enabled); |
|
} |
|
|
|
static int ecb_crypt(struct skcipher_request *req, bool enc) |
|
{ |
|
bool fpu_enabled = false; |
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
|
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); |
|
struct skcipher_walk walk; |
|
const unsigned int bsize = CAST5_BLOCK_SIZE; |
|
unsigned int nbytes; |
|
void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); |
|
int err; |
|
|
|
err = skcipher_walk_virt(&walk, req, false); |
|
|
|
while ((nbytes = walk.nbytes)) { |
|
u8 *wsrc = walk.src.virt.addr; |
|
u8 *wdst = walk.dst.virt.addr; |
|
|
|
fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); |
|
|
|
/* Process multi-block batch */ |
|
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { |
|
fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; |
|
do { |
|
fn(ctx, wdst, wsrc); |
|
|
|
wsrc += bsize * CAST5_PARALLEL_BLOCKS; |
|
wdst += bsize * CAST5_PARALLEL_BLOCKS; |
|
nbytes -= bsize * CAST5_PARALLEL_BLOCKS; |
|
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); |
|
|
|
if (nbytes < bsize) |
|
goto done; |
|
} |
|
|
|
fn = (enc) ? __cast5_encrypt : __cast5_decrypt; |
|
|
|
/* Handle leftovers */ |
|
do { |
|
fn(ctx, wdst, wsrc); |
|
|
|
wsrc += bsize; |
|
wdst += bsize; |
|
nbytes -= bsize; |
|
} while (nbytes >= bsize); |
|
|
|
done: |
|
err = skcipher_walk_done(&walk, nbytes); |
|
} |
|
|
|
cast5_fpu_end(fpu_enabled); |
|
return err; |
|
} |
|
|
|
static int ecb_encrypt(struct skcipher_request *req) |
|
{ |
|
return ecb_crypt(req, true); |
|
} |
|
|
|
static int ecb_decrypt(struct skcipher_request *req) |
|
{ |
|
return ecb_crypt(req, false); |
|
} |
|
|
|
static int cbc_encrypt(struct skcipher_request *req) |
|
{ |
|
const unsigned int bsize = CAST5_BLOCK_SIZE; |
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
|
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); |
|
struct skcipher_walk walk; |
|
unsigned int nbytes; |
|
int err; |
|
|
|
err = skcipher_walk_virt(&walk, req, false); |
|
|
|
while ((nbytes = walk.nbytes)) { |
|
u64 *src = (u64 *)walk.src.virt.addr; |
|
u64 *dst = (u64 *)walk.dst.virt.addr; |
|
u64 *iv = (u64 *)walk.iv; |
|
|
|
do { |
|
*dst = *src ^ *iv; |
|
__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); |
|
iv = dst; |
|
src++; |
|
dst++; |
|
nbytes -= bsize; |
|
} while (nbytes >= bsize); |
|
|
|
*(u64 *)walk.iv = *iv; |
|
err = skcipher_walk_done(&walk, nbytes); |
|
} |
|
|
|
return err; |
|
} |
|
|
|
static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, |
|
struct skcipher_walk *walk) |
|
{ |
|
const unsigned int bsize = CAST5_BLOCK_SIZE; |
|
unsigned int nbytes = walk->nbytes; |
|
u64 *src = (u64 *)walk->src.virt.addr; |
|
u64 *dst = (u64 *)walk->dst.virt.addr; |
|
u64 last_iv; |
|
|
|
/* Start of the last block. */ |
|
src += nbytes / bsize - 1; |
|
dst += nbytes / bsize - 1; |
|
|
|
last_iv = *src; |
|
|
|
/* Process multi-block batch */ |
|
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { |
|
do { |
|
nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); |
|
src -= CAST5_PARALLEL_BLOCKS - 1; |
|
dst -= CAST5_PARALLEL_BLOCKS - 1; |
|
|
|
cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); |
|
|
|
nbytes -= bsize; |
|
if (nbytes < bsize) |
|
goto done; |
|
|
|
*dst ^= *(src - 1); |
|
src -= 1; |
|
dst -= 1; |
|
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); |
|
} |
|
|
|
/* Handle leftovers */ |
|
for (;;) { |
|
__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); |
|
|
|
nbytes -= bsize; |
|
if (nbytes < bsize) |
|
break; |
|
|
|
*dst ^= *(src - 1); |
|
src -= 1; |
|
dst -= 1; |
|
} |
|
|
|
done: |
|
*dst ^= *(u64 *)walk->iv; |
|
*(u64 *)walk->iv = last_iv; |
|
|
|
return nbytes; |
|
} |
|
|
|
static int cbc_decrypt(struct skcipher_request *req) |
|
{ |
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
|
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); |
|
bool fpu_enabled = false; |
|
struct skcipher_walk walk; |
|
unsigned int nbytes; |
|
int err; |
|
|
|
err = skcipher_walk_virt(&walk, req, false); |
|
|
|
while ((nbytes = walk.nbytes)) { |
|
fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); |
|
nbytes = __cbc_decrypt(ctx, &walk); |
|
err = skcipher_walk_done(&walk, nbytes); |
|
} |
|
|
|
cast5_fpu_end(fpu_enabled); |
|
return err; |
|
} |
|
|
|
static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) |
|
{ |
|
u8 *ctrblk = walk->iv; |
|
u8 keystream[CAST5_BLOCK_SIZE]; |
|
u8 *src = walk->src.virt.addr; |
|
u8 *dst = walk->dst.virt.addr; |
|
unsigned int nbytes = walk->nbytes; |
|
|
|
__cast5_encrypt(ctx, keystream, ctrblk); |
|
crypto_xor_cpy(dst, keystream, src, nbytes); |
|
|
|
crypto_inc(ctrblk, CAST5_BLOCK_SIZE); |
|
} |
|
|
|
static unsigned int __ctr_crypt(struct skcipher_walk *walk, |
|
struct cast5_ctx *ctx) |
|
{ |
|
const unsigned int bsize = CAST5_BLOCK_SIZE; |
|
unsigned int nbytes = walk->nbytes; |
|
u64 *src = (u64 *)walk->src.virt.addr; |
|
u64 *dst = (u64 *)walk->dst.virt.addr; |
|
|
|
/* Process multi-block batch */ |
|
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { |
|
do { |
|
cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, |
|
(__be64 *)walk->iv); |
|
|
|
src += CAST5_PARALLEL_BLOCKS; |
|
dst += CAST5_PARALLEL_BLOCKS; |
|
nbytes -= bsize * CAST5_PARALLEL_BLOCKS; |
|
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); |
|
|
|
if (nbytes < bsize) |
|
goto done; |
|
} |
|
|
|
/* Handle leftovers */ |
|
do { |
|
u64 ctrblk; |
|
|
|
if (dst != src) |
|
*dst = *src; |
|
|
|
ctrblk = *(u64 *)walk->iv; |
|
be64_add_cpu((__be64 *)walk->iv, 1); |
|
|
|
__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
|
*dst ^= ctrblk; |
|
|
|
src += 1; |
|
dst += 1; |
|
nbytes -= bsize; |
|
} while (nbytes >= bsize); |
|
|
|
done: |
|
return nbytes; |
|
} |
|
|
|
static int ctr_crypt(struct skcipher_request *req) |
|
{ |
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
|
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); |
|
bool fpu_enabled = false; |
|
struct skcipher_walk walk; |
|
unsigned int nbytes; |
|
int err; |
|
|
|
err = skcipher_walk_virt(&walk, req, false); |
|
|
|
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { |
|
fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); |
|
nbytes = __ctr_crypt(&walk, ctx); |
|
err = skcipher_walk_done(&walk, nbytes); |
|
} |
|
|
|
cast5_fpu_end(fpu_enabled); |
|
|
|
if (walk.nbytes) { |
|
ctr_crypt_final(&walk, ctx); |
|
err = skcipher_walk_done(&walk, 0); |
|
} |
|
|
|
return err; |
|
} |
|
|
|
static struct skcipher_alg cast5_algs[] = { |
|
{ |
|
.base.cra_name = "__ecb(cast5)", |
|
.base.cra_driver_name = "__ecb-cast5-avx", |
|
.base.cra_priority = 200, |
|
.base.cra_flags = CRYPTO_ALG_INTERNAL, |
|
.base.cra_blocksize = CAST5_BLOCK_SIZE, |
|
.base.cra_ctxsize = sizeof(struct cast5_ctx), |
|
.base.cra_module = THIS_MODULE, |
|
.min_keysize = CAST5_MIN_KEY_SIZE, |
|
.max_keysize = CAST5_MAX_KEY_SIZE, |
|
.setkey = cast5_setkey_skcipher, |
|
.encrypt = ecb_encrypt, |
|
.decrypt = ecb_decrypt, |
|
}, { |
|
.base.cra_name = "__cbc(cast5)", |
|
.base.cra_driver_name = "__cbc-cast5-avx", |
|
.base.cra_priority = 200, |
|
.base.cra_flags = CRYPTO_ALG_INTERNAL, |
|
.base.cra_blocksize = CAST5_BLOCK_SIZE, |
|
.base.cra_ctxsize = sizeof(struct cast5_ctx), |
|
.base.cra_module = THIS_MODULE, |
|
.min_keysize = CAST5_MIN_KEY_SIZE, |
|
.max_keysize = CAST5_MAX_KEY_SIZE, |
|
.ivsize = CAST5_BLOCK_SIZE, |
|
.setkey = cast5_setkey_skcipher, |
|
.encrypt = cbc_encrypt, |
|
.decrypt = cbc_decrypt, |
|
}, { |
|
.base.cra_name = "__ctr(cast5)", |
|
.base.cra_driver_name = "__ctr-cast5-avx", |
|
.base.cra_priority = 200, |
|
.base.cra_flags = CRYPTO_ALG_INTERNAL, |
|
.base.cra_blocksize = 1, |
|
.base.cra_ctxsize = sizeof(struct cast5_ctx), |
|
.base.cra_module = THIS_MODULE, |
|
.min_keysize = CAST5_MIN_KEY_SIZE, |
|
.max_keysize = CAST5_MAX_KEY_SIZE, |
|
.ivsize = CAST5_BLOCK_SIZE, |
|
.chunksize = CAST5_BLOCK_SIZE, |
|
.setkey = cast5_setkey_skcipher, |
|
.encrypt = ctr_crypt, |
|
.decrypt = ctr_crypt, |
|
} |
|
}; |
|
|
|
static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; |
|
|
|
static int __init cast5_init(void) |
|
{ |
|
const char *feature_name; |
|
|
|
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
|
&feature_name)) { |
|
pr_info("CPU feature '%s' is not supported.\n", feature_name); |
|
return -ENODEV; |
|
} |
|
|
|
return simd_register_skciphers_compat(cast5_algs, |
|
ARRAY_SIZE(cast5_algs), |
|
cast5_simd_algs); |
|
} |
|
|
|
static void __exit cast5_exit(void) |
|
{ |
|
simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), |
|
cast5_simd_algs); |
|
} |
|
|
|
module_init(cast5_init); |
|
module_exit(cast5_exit); |
|
|
|
MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); |
|
MODULE_LICENSE("GPL"); |
|
MODULE_ALIAS_CRYPTO("cast5");
|
|
|