mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
351 lines
8.6 KiB
351 lines
8.6 KiB
// SPDX-License-Identifier: GPL-2.0 |
|
/* |
|
* Routines to emulate some Altivec/VMX instructions, specifically |
|
* those that can trap when given denormalized operands in Java mode. |
|
*/ |
|
#include <linux/kernel.h> |
|
#include <linux/errno.h> |
|
#include <linux/sched.h> |
|
#include <asm/ptrace.h> |
|
#include <asm/processor.h> |
|
#include <asm/switch_to.h> |
|
#include <linux/uaccess.h> |
|
#include <asm/inst.h> |
|
|
|
/* Functions in vector.S */ |
|
extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); |
|
extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); |
|
extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); |
|
extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); |
|
extern void vrefp(vector128 *dst, vector128 *src); |
|
extern void vrsqrtefp(vector128 *dst, vector128 *src); |
|
extern void vexptep(vector128 *dst, vector128 *src); |
|
|
|
static unsigned int exp2s[8] = { |
|
0x800000, |
|
0x8b95c2, |
|
0x9837f0, |
|
0xa5fed7, |
|
0xb504f3, |
|
0xc5672a, |
|
0xd744fd, |
|
0xeac0c7 |
|
}; |
|
|
|
/* |
|
* Computes an estimate of 2^x. The `s' argument is the 32-bit |
|
* single-precision floating-point representation of x. |
|
*/ |
|
static unsigned int eexp2(unsigned int s) |
|
{ |
|
int exp, pwr; |
|
unsigned int mant, frac; |
|
|
|
/* extract exponent field from input */ |
|
exp = ((s >> 23) & 0xff) - 127; |
|
if (exp > 7) { |
|
/* check for NaN input */ |
|
if (exp == 128 && (s & 0x7fffff) != 0) |
|
return s | 0x400000; /* return QNaN */ |
|
/* 2^-big = 0, 2^+big = +Inf */ |
|
return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ |
|
} |
|
if (exp < -23) |
|
return 0x3f800000; /* 1.0 */ |
|
|
|
/* convert to fixed point integer in 9.23 representation */ |
|
pwr = (s & 0x7fffff) | 0x800000; |
|
if (exp > 0) |
|
pwr <<= exp; |
|
else |
|
pwr >>= -exp; |
|
if (s & 0x80000000) |
|
pwr = -pwr; |
|
|
|
/* extract integer part, which becomes exponent part of result */ |
|
exp = (pwr >> 23) + 126; |
|
if (exp >= 254) |
|
return 0x7f800000; |
|
if (exp < -23) |
|
return 0; |
|
|
|
/* table lookup on top 3 bits of fraction to get mantissa */ |
|
mant = exp2s[(pwr >> 20) & 7]; |
|
|
|
/* linear interpolation using remaining 20 bits of fraction */ |
|
asm("mulhwu %0,%1,%2" : "=r" (frac) |
|
: "r" (pwr << 12), "r" (0x172b83ff)); |
|
asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); |
|
mant += frac; |
|
|
|
if (exp >= 0) |
|
return mant + (exp << 23); |
|
|
|
/* denormalized result */ |
|
exp = -exp; |
|
mant += 1 << (exp - 1); |
|
return mant >> exp; |
|
} |
|
|
|
/* |
|
* Computes an estimate of log_2(x). The `s' argument is the 32-bit |
|
* single-precision floating-point representation of x. |
|
*/ |
|
static unsigned int elog2(unsigned int s) |
|
{ |
|
int exp, mant, lz, frac; |
|
|
|
exp = s & 0x7f800000; |
|
mant = s & 0x7fffff; |
|
if (exp == 0x7f800000) { /* Inf or NaN */ |
|
if (mant != 0) |
|
s |= 0x400000; /* turn NaN into QNaN */ |
|
return s; |
|
} |
|
if ((exp | mant) == 0) /* +0 or -0 */ |
|
return 0xff800000; /* return -Inf */ |
|
|
|
if (exp == 0) { |
|
/* denormalized */ |
|
asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); |
|
mant <<= lz - 8; |
|
exp = (-118 - lz) << 23; |
|
} else { |
|
mant |= 0x800000; |
|
exp -= 127 << 23; |
|
} |
|
|
|
if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ |
|
exp |= 0x400000; /* 0.5 * 2^23 */ |
|
asm("mulhwu %0,%1,%2" : "=r" (mant) |
|
: "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ |
|
} |
|
if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ |
|
exp |= 0x200000; /* 0.25 * 2^23 */ |
|
asm("mulhwu %0,%1,%2" : "=r" (mant) |
|
: "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ |
|
} |
|
if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ |
|
exp |= 0x100000; /* 0.125 * 2^23 */ |
|
asm("mulhwu %0,%1,%2" : "=r" (mant) |
|
: "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ |
|
} |
|
if (mant > 0x800000) { /* 1.0 * 2^23 */ |
|
/* calculate (mant - 1) * 1.381097463 */ |
|
/* 1.381097463 == 0.125 / (2^0.125 - 1) */ |
|
asm("mulhwu %0,%1,%2" : "=r" (frac) |
|
: "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); |
|
exp += frac; |
|
} |
|
s = exp & 0x80000000; |
|
if (exp != 0) { |
|
if (s) |
|
exp = -exp; |
|
asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); |
|
lz = 8 - lz; |
|
if (lz > 0) |
|
exp >>= lz; |
|
else if (lz < 0) |
|
exp <<= -lz; |
|
s += ((lz + 126) << 23) + exp; |
|
} |
|
return s; |
|
} |
|
|
|
#define VSCR_SAT 1 |
|
|
|
static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) |
|
{ |
|
int exp, mant; |
|
|
|
exp = (x >> 23) & 0xff; |
|
mant = x & 0x7fffff; |
|
if (exp == 255 && mant != 0) |
|
return 0; /* NaN -> 0 */ |
|
exp = exp - 127 + scale; |
|
if (exp < 0) |
|
return 0; /* round towards zero */ |
|
if (exp >= 31) { |
|
/* saturate, unless the result would be -2^31 */ |
|
if (x + (scale << 23) != 0xcf000000) |
|
*vscrp |= VSCR_SAT; |
|
return (x & 0x80000000)? 0x80000000: 0x7fffffff; |
|
} |
|
mant |= 0x800000; |
|
mant = (mant << 7) >> (30 - exp); |
|
return (x & 0x80000000)? -mant: mant; |
|
} |
|
|
|
static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) |
|
{ |
|
int exp; |
|
unsigned int mant; |
|
|
|
exp = (x >> 23) & 0xff; |
|
mant = x & 0x7fffff; |
|
if (exp == 255 && mant != 0) |
|
return 0; /* NaN -> 0 */ |
|
exp = exp - 127 + scale; |
|
if (exp < 0) |
|
return 0; /* round towards zero */ |
|
if (x & 0x80000000) { |
|
/* negative => saturate to 0 */ |
|
*vscrp |= VSCR_SAT; |
|
return 0; |
|
} |
|
if (exp >= 32) { |
|
/* saturate */ |
|
*vscrp |= VSCR_SAT; |
|
return 0xffffffff; |
|
} |
|
mant |= 0x800000; |
|
mant = (mant << 8) >> (31 - exp); |
|
return mant; |
|
} |
|
|
|
/* Round to floating integer, towards 0 */ |
|
static unsigned int rfiz(unsigned int x) |
|
{ |
|
int exp; |
|
|
|
exp = ((x >> 23) & 0xff) - 127; |
|
if (exp == 128 && (x & 0x7fffff) != 0) |
|
return x | 0x400000; /* NaN -> make it a QNaN */ |
|
if (exp >= 23) |
|
return x; /* it's an integer already (or Inf) */ |
|
if (exp < 0) |
|
return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ |
|
return x & ~(0x7fffff >> exp); |
|
} |
|
|
|
/* Round to floating integer, towards +/- Inf */ |
|
static unsigned int rfii(unsigned int x) |
|
{ |
|
int exp, mask; |
|
|
|
exp = ((x >> 23) & 0xff) - 127; |
|
if (exp == 128 && (x & 0x7fffff) != 0) |
|
return x | 0x400000; /* NaN -> make it a QNaN */ |
|
if (exp >= 23) |
|
return x; /* it's an integer already (or Inf) */ |
|
if ((x & 0x7fffffff) == 0) |
|
return x; /* +/-0 -> +/-0 */ |
|
if (exp < 0) |
|
/* 0 < |x| < 1.0 rounds to +/- 1.0 */ |
|
return (x & 0x80000000) | 0x3f800000; |
|
mask = 0x7fffff >> exp; |
|
/* mantissa overflows into exponent - that's OK, |
|
it can't overflow into the sign bit */ |
|
return (x + mask) & ~mask; |
|
} |
|
|
|
/* Round to floating integer, to nearest */ |
|
static unsigned int rfin(unsigned int x) |
|
{ |
|
int exp, half; |
|
|
|
exp = ((x >> 23) & 0xff) - 127; |
|
if (exp == 128 && (x & 0x7fffff) != 0) |
|
return x | 0x400000; /* NaN -> make it a QNaN */ |
|
if (exp >= 23) |
|
return x; /* it's an integer already (or Inf) */ |
|
if (exp < -1) |
|
return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ |
|
if (exp == -1) |
|
/* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ |
|
return (x & 0x80000000) | 0x3f800000; |
|
half = 0x400000 >> exp; |
|
/* add 0.5 to the magnitude and chop off the fraction bits */ |
|
return (x + half) & ~(0x7fffff >> exp); |
|
} |
|
|
|
int emulate_altivec(struct pt_regs *regs) |
|
{ |
|
struct ppc_inst instr; |
|
unsigned int i, word; |
|
unsigned int va, vb, vc, vd; |
|
vector128 *vrs; |
|
|
|
if (get_user_instr(instr, (void __user *)regs->nip)) |
|
return -EFAULT; |
|
|
|
word = ppc_inst_val(instr); |
|
if (ppc_inst_primary_opcode(instr) != 4) |
|
return -EINVAL; /* not an altivec instruction */ |
|
vd = (word >> 21) & 0x1f; |
|
va = (word >> 16) & 0x1f; |
|
vb = (word >> 11) & 0x1f; |
|
vc = (word >> 6) & 0x1f; |
|
|
|
vrs = current->thread.vr_state.vr; |
|
switch (word & 0x3f) { |
|
case 10: |
|
switch (vc) { |
|
case 0: /* vaddfp */ |
|
vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); |
|
break; |
|
case 1: /* vsubfp */ |
|
vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); |
|
break; |
|
case 4: /* vrefp */ |
|
vrefp(&vrs[vd], &vrs[vb]); |
|
break; |
|
case 5: /* vrsqrtefp */ |
|
vrsqrtefp(&vrs[vd], &vrs[vb]); |
|
break; |
|
case 6: /* vexptefp */ |
|
for (i = 0; i < 4; ++i) |
|
vrs[vd].u[i] = eexp2(vrs[vb].u[i]); |
|
break; |
|
case 7: /* vlogefp */ |
|
for (i = 0; i < 4; ++i) |
|
vrs[vd].u[i] = elog2(vrs[vb].u[i]); |
|
break; |
|
case 8: /* vrfin */ |
|
for (i = 0; i < 4; ++i) |
|
vrs[vd].u[i] = rfin(vrs[vb].u[i]); |
|
break; |
|
case 9: /* vrfiz */ |
|
for (i = 0; i < 4; ++i) |
|
vrs[vd].u[i] = rfiz(vrs[vb].u[i]); |
|
break; |
|
case 10: /* vrfip */ |
|
for (i = 0; i < 4; ++i) { |
|
u32 x = vrs[vb].u[i]; |
|
x = (x & 0x80000000)? rfiz(x): rfii(x); |
|
vrs[vd].u[i] = x; |
|
} |
|
break; |
|
case 11: /* vrfim */ |
|
for (i = 0; i < 4; ++i) { |
|
u32 x = vrs[vb].u[i]; |
|
x = (x & 0x80000000)? rfii(x): rfiz(x); |
|
vrs[vd].u[i] = x; |
|
} |
|
break; |
|
case 14: /* vctuxs */ |
|
for (i = 0; i < 4; ++i) |
|
vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, |
|
¤t->thread.vr_state.vscr.u[3]); |
|
break; |
|
case 15: /* vctsxs */ |
|
for (i = 0; i < 4; ++i) |
|
vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, |
|
¤t->thread.vr_state.vscr.u[3]); |
|
break; |
|
default: |
|
return -EINVAL; |
|
} |
|
break; |
|
case 46: /* vmaddfp */ |
|
vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); |
|
break; |
|
case 47: /* vnmsubfp */ |
|
vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); |
|
break; |
|
default: |
|
return -EINVAL; |
|
} |
|
|
|
return 0; |
|
}
|
|
|