forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
335 lines
6.7 KiB
335 lines
6.7 KiB
/* SPDX-License-Identifier: GPL-2.0 */ |
|
#include <asm/processor.h> |
|
#include <asm/ppc_asm.h> |
|
#include <asm/reg.h> |
|
#include <asm/asm-offsets.h> |
|
#include <asm/cputable.h> |
|
#include <asm/thread_info.h> |
|
#include <asm/page.h> |
|
#include <asm/ptrace.h> |
|
#include <asm/export.h> |
|
#include <asm/asm-compat.h> |
|
|
|
/* |
|
* Load state from memory into VMX registers including VSCR. |
|
* Assumes the caller has enabled VMX in the MSR. |
|
*/ |
|
_GLOBAL(load_vr_state) |
|
li r4,VRSTATE_VSCR |
|
lvx v0,r4,r3 |
|
mtvscr v0 |
|
REST_32VRS(0,r4,r3) |
|
blr |
|
EXPORT_SYMBOL(load_vr_state) |
|
_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ |
|
|
|
/* |
|
* Store VMX state into memory, including VSCR. |
|
* Assumes the caller has enabled VMX in the MSR. |
|
*/ |
|
_GLOBAL(store_vr_state) |
|
SAVE_32VRS(0, r4, r3) |
|
mfvscr v0 |
|
li r4, VRSTATE_VSCR |
|
stvx v0, r4, r3 |
|
blr |
|
EXPORT_SYMBOL(store_vr_state) |
|
|
|
/* |
|
* Disable VMX for the task which had it previously, |
|
* and save its vector registers in its thread_struct. |
|
* Enables the VMX for use in the kernel on return. |
|
* On SMP we know the VMX is free, since we give it up every |
|
* switch (ie, no lazy save of the vector registers). |
|
* |
|
* Note that on 32-bit this can only use registers that will be |
|
* restored by fast_exception_return, i.e. r3 - r6, r10 and r11. |
|
*/ |
|
_GLOBAL(load_up_altivec) |
|
mfmsr r5 /* grab the current MSR */ |
|
oris r5,r5,MSR_VEC@h |
|
MTMSRD(r5) /* enable use of AltiVec now */ |
|
isync |
|
|
|
/* |
|
* While userspace in general ignores VRSAVE, glibc uses it as a boolean |
|
* to optimise userspace context save/restore. Whenever we take an |
|
* altivec unavailable exception we must set VRSAVE to something non |
|
* zero. Set it to all 1s. See also the programming note in the ISA. |
|
*/ |
|
mfspr r4,SPRN_VRSAVE |
|
cmpwi 0,r4,0 |
|
bne+ 1f |
|
li r4,-1 |
|
mtspr SPRN_VRSAVE,r4 |
|
1: |
|
/* enable use of VMX after return */ |
|
#ifdef CONFIG_PPC32 |
|
addi r5,r2,THREAD |
|
oris r9,r9,MSR_VEC@h |
|
#else |
|
ld r4,PACACURRENT(r13) |
|
addi r5,r4,THREAD /* Get THREAD */ |
|
oris r12,r12,MSR_VEC@h |
|
std r12,_MSR(r1) |
|
#ifdef CONFIG_PPC_BOOK3S_64 |
|
li r4,0 |
|
stb r4,PACASRR_VALID(r13) |
|
#endif |
|
#endif |
|
li r4,1 |
|
stb r4,THREAD_LOAD_VEC(r5) |
|
addi r6,r5,THREAD_VRSTATE |
|
li r10,VRSTATE_VSCR |
|
stw r4,THREAD_USED_VR(r5) |
|
lvx v0,r10,r6 |
|
mtvscr v0 |
|
REST_32VRS(0,r4,r6) |
|
/* restore registers and return */ |
|
blr |
|
_ASM_NOKPROBE_SYMBOL(load_up_altivec) |
|
|
|
/* |
|
* save_altivec(tsk) |
|
* Save the vector registers to its thread_struct |
|
*/ |
|
_GLOBAL(save_altivec) |
|
addi r3,r3,THREAD /* want THREAD of task */ |
|
PPC_LL r7,THREAD_VRSAVEAREA(r3) |
|
PPC_LL r5,PT_REGS(r3) |
|
PPC_LCMPI 0,r7,0 |
|
bne 2f |
|
addi r7,r3,THREAD_VRSTATE |
|
2: SAVE_32VRS(0,r4,r7) |
|
mfvscr v0 |
|
li r4,VRSTATE_VSCR |
|
stvx v0,r4,r7 |
|
blr |
|
|
|
#ifdef CONFIG_VSX |
|
|
|
#ifdef CONFIG_PPC32 |
|
#error This asm code isn't ready for 32-bit kernels |
|
#endif |
|
|
|
/* |
|
* load_up_vsx(unused, unused, tsk) |
|
* Disable VSX for the task which had it previously, |
|
* and save its vector registers in its thread_struct. |
|
* Reuse the fp and vsx saves, but first check to see if they have |
|
* been saved already. |
|
*/ |
|
_GLOBAL(load_up_vsx) |
|
/* Load FP and VSX registers if they haven't been done yet */ |
|
andi. r5,r12,MSR_FP |
|
beql+ load_up_fpu /* skip if already loaded */ |
|
andis. r5,r12,MSR_VEC@h |
|
beql+ load_up_altivec /* skip if already loaded */ |
|
|
|
ld r4,PACACURRENT(r13) |
|
addi r4,r4,THREAD /* Get THREAD */ |
|
li r6,1 |
|
stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ |
|
/* enable use of VSX after return */ |
|
oris r12,r12,MSR_VSX@h |
|
std r12,_MSR(r1) |
|
li r4,0 |
|
stb r4,PACASRR_VALID(r13) |
|
b fast_interrupt_return_srr |
|
|
|
#endif /* CONFIG_VSX */ |
|
|
|
|
|
/* |
|
* The routines below are in assembler so we can closely control the |
|
* usage of floating-point registers. These routines must be called |
|
* with preempt disabled. |
|
*/ |
|
#ifdef CONFIG_PPC32 |
|
.data |
|
fpzero: |
|
.long 0 |
|
fpone: |
|
.long 0x3f800000 /* 1.0 in single-precision FP */ |
|
fphalf: |
|
.long 0x3f000000 /* 0.5 in single-precision FP */ |
|
|
|
#define LDCONST(fr, name) \ |
|
lis r11,name@ha; \ |
|
lfs fr,name@l(r11) |
|
#else |
|
|
|
.section ".toc","aw" |
|
fpzero: |
|
.tc FD_0_0[TC],0 |
|
fpone: |
|
.tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ |
|
fphalf: |
|
.tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ |
|
|
|
#define LDCONST(fr, name) \ |
|
lfd fr,name@toc(r2) |
|
#endif |
|
|
|
.text |
|
/* |
|
* Internal routine to enable floating point and set FPSCR to 0. |
|
* Don't call it from C; it doesn't use the normal calling convention. |
|
*/ |
|
fpenable: |
|
#ifdef CONFIG_PPC32 |
|
stwu r1,-64(r1) |
|
#else |
|
stdu r1,-64(r1) |
|
#endif |
|
mfmsr r10 |
|
ori r11,r10,MSR_FP |
|
mtmsr r11 |
|
isync |
|
stfd fr0,24(r1) |
|
stfd fr1,16(r1) |
|
stfd fr31,8(r1) |
|
LDCONST(fr1, fpzero) |
|
mffs fr31 |
|
MTFSF_L(fr1) |
|
blr |
|
|
|
fpdisable: |
|
mtlr r12 |
|
MTFSF_L(fr31) |
|
lfd fr31,8(r1) |
|
lfd fr1,16(r1) |
|
lfd fr0,24(r1) |
|
mtmsr r10 |
|
isync |
|
addi r1,r1,64 |
|
blr |
|
|
|
/* |
|
* Vector add, floating point. |
|
*/ |
|
_GLOBAL(vaddfp) |
|
mflr r12 |
|
bl fpenable |
|
li r0,4 |
|
mtctr r0 |
|
li r6,0 |
|
1: lfsx fr0,r4,r6 |
|
lfsx fr1,r5,r6 |
|
fadds fr0,fr0,fr1 |
|
stfsx fr0,r3,r6 |
|
addi r6,r6,4 |
|
bdnz 1b |
|
b fpdisable |
|
|
|
/* |
|
* Vector subtract, floating point. |
|
*/ |
|
_GLOBAL(vsubfp) |
|
mflr r12 |
|
bl fpenable |
|
li r0,4 |
|
mtctr r0 |
|
li r6,0 |
|
1: lfsx fr0,r4,r6 |
|
lfsx fr1,r5,r6 |
|
fsubs fr0,fr0,fr1 |
|
stfsx fr0,r3,r6 |
|
addi r6,r6,4 |
|
bdnz 1b |
|
b fpdisable |
|
|
|
/* |
|
* Vector multiply and add, floating point. |
|
*/ |
|
_GLOBAL(vmaddfp) |
|
mflr r12 |
|
bl fpenable |
|
stfd fr2,32(r1) |
|
li r0,4 |
|
mtctr r0 |
|
li r7,0 |
|
1: lfsx fr0,r4,r7 |
|
lfsx fr1,r5,r7 |
|
lfsx fr2,r6,r7 |
|
fmadds fr0,fr0,fr2,fr1 |
|
stfsx fr0,r3,r7 |
|
addi r7,r7,4 |
|
bdnz 1b |
|
lfd fr2,32(r1) |
|
b fpdisable |
|
|
|
/* |
|
* Vector negative multiply and subtract, floating point. |
|
*/ |
|
_GLOBAL(vnmsubfp) |
|
mflr r12 |
|
bl fpenable |
|
stfd fr2,32(r1) |
|
li r0,4 |
|
mtctr r0 |
|
li r7,0 |
|
1: lfsx fr0,r4,r7 |
|
lfsx fr1,r5,r7 |
|
lfsx fr2,r6,r7 |
|
fnmsubs fr0,fr0,fr2,fr1 |
|
stfsx fr0,r3,r7 |
|
addi r7,r7,4 |
|
bdnz 1b |
|
lfd fr2,32(r1) |
|
b fpdisable |
|
|
|
/* |
|
* Vector reciprocal estimate. We just compute 1.0/x. |
|
* r3 -> destination, r4 -> source. |
|
*/ |
|
_GLOBAL(vrefp) |
|
mflr r12 |
|
bl fpenable |
|
li r0,4 |
|
LDCONST(fr1, fpone) |
|
mtctr r0 |
|
li r6,0 |
|
1: lfsx fr0,r4,r6 |
|
fdivs fr0,fr1,fr0 |
|
stfsx fr0,r3,r6 |
|
addi r6,r6,4 |
|
bdnz 1b |
|
b fpdisable |
|
|
|
/* |
|
* Vector reciprocal square-root estimate, floating point. |
|
* We use the frsqrte instruction for the initial estimate followed |
|
* by 2 iterations of Newton-Raphson to get sufficient accuracy. |
|
* r3 -> destination, r4 -> source. |
|
*/ |
|
_GLOBAL(vrsqrtefp) |
|
mflr r12 |
|
bl fpenable |
|
stfd fr2,32(r1) |
|
stfd fr3,40(r1) |
|
stfd fr4,48(r1) |
|
stfd fr5,56(r1) |
|
li r0,4 |
|
LDCONST(fr4, fpone) |
|
LDCONST(fr5, fphalf) |
|
mtctr r0 |
|
li r6,0 |
|
1: lfsx fr0,r4,r6 |
|
frsqrte fr1,fr0 /* r = frsqrte(s) */ |
|
fmuls fr3,fr1,fr0 /* r * s */ |
|
fmuls fr2,fr1,fr5 /* r * 0.5 */ |
|
fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ |
|
fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ |
|
fmuls fr3,fr1,fr0 /* r * s */ |
|
fmuls fr2,fr1,fr5 /* r * 0.5 */ |
|
fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ |
|
fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ |
|
stfsx fr1,r3,r6 |
|
addi r6,r6,4 |
|
bdnz 1b |
|
lfd fr5,56(r1) |
|
lfd fr4,48(r1) |
|
lfd fr3,40(r1) |
|
lfd fr2,32(r1) |
|
b fpdisable
|
|
|