mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
596 lines
16 KiB
596 lines
16 KiB
/* SPDX-License-Identifier: GPL-2.0 */ |
|
/* |
|
* Copyright (C) 1994 Linus Torvalds |
|
* |
|
* Pentium III FXSR, SSE support |
|
* General FPU state handling cleanups |
|
* Gareth Hughes <[email protected]>, May 2000 |
|
* x86-64 work by Andi Kleen 2002 |
|
*/ |
|
|
|
#ifndef _ASM_X86_FPU_INTERNAL_H |
|
#define _ASM_X86_FPU_INTERNAL_H |
|
|
|
#include <linux/compat.h> |
|
#include <linux/sched.h> |
|
#include <linux/slab.h> |
|
#include <linux/mm.h> |
|
|
|
#include <asm/user.h> |
|
#include <asm/fpu/api.h> |
|
#include <asm/fpu/xstate.h> |
|
#include <asm/fpu/xcr.h> |
|
#include <asm/cpufeature.h> |
|
#include <asm/trace/fpu.h> |
|
|
|
/* |
|
* High level FPU state handling functions: |
|
*/ |
|
extern void fpu__prepare_read(struct fpu *fpu); |
|
extern void fpu__prepare_write(struct fpu *fpu); |
|
extern void fpu__save(struct fpu *fpu); |
|
extern int fpu__restore_sig(void __user *buf, int ia32_frame); |
|
extern void fpu__drop(struct fpu *fpu); |
|
extern int fpu__copy(struct task_struct *dst, struct task_struct *src); |
|
extern void fpu__clear_user_states(struct fpu *fpu); |
|
extern void fpu__clear_all(struct fpu *fpu); |
|
extern int fpu__exception_code(struct fpu *fpu, int trap_nr); |
|
|
|
/* |
|
* Boot time FPU initialization functions: |
|
*/ |
|
extern void fpu__init_cpu(void); |
|
extern void fpu__init_system_xstate(void); |
|
extern void fpu__init_cpu_xstate(void); |
|
extern void fpu__init_system(struct cpuinfo_x86 *c); |
|
extern void fpu__init_check_bugs(void); |
|
extern void fpu__resume_cpu(void); |
|
extern u64 fpu__get_supported_xfeatures_mask(void); |
|
|
|
/* |
|
* Debugging facility: |
|
*/ |
|
#ifdef CONFIG_X86_DEBUG_FPU |
|
# define WARN_ON_FPU(x) WARN_ON_ONCE(x) |
|
#else |
|
# define WARN_ON_FPU(x) ({ (void)(x); 0; }) |
|
#endif |
|
|
|
/* |
|
* FPU related CPU feature flag helper routines: |
|
*/ |
|
static __always_inline __pure bool use_xsaveopt(void) |
|
{ |
|
return static_cpu_has(X86_FEATURE_XSAVEOPT); |
|
} |
|
|
|
static __always_inline __pure bool use_xsave(void) |
|
{ |
|
return static_cpu_has(X86_FEATURE_XSAVE); |
|
} |
|
|
|
static __always_inline __pure bool use_fxsr(void) |
|
{ |
|
return static_cpu_has(X86_FEATURE_FXSR); |
|
} |
|
|
|
/* |
|
* fpstate handling functions: |
|
*/ |
|
|
|
extern union fpregs_state init_fpstate; |
|
|
|
extern void fpstate_init(union fpregs_state *state); |
|
#ifdef CONFIG_MATH_EMULATION |
|
extern void fpstate_init_soft(struct swregs_state *soft); |
|
#else |
|
static inline void fpstate_init_soft(struct swregs_state *soft) {} |
|
#endif |
|
|
|
static inline void fpstate_init_xstate(struct xregs_state *xsave) |
|
{ |
|
/* |
|
* XRSTORS requires these bits set in xcomp_bv, or it will |
|
* trigger #GP: |
|
*/ |
|
xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; |
|
} |
|
|
|
static inline void fpstate_init_fxstate(struct fxregs_state *fx) |
|
{ |
|
fx->cwd = 0x37f; |
|
fx->mxcsr = MXCSR_DEFAULT; |
|
} |
|
extern void fpstate_sanitize_xstate(struct fpu *fpu); |
|
|
|
#define user_insn(insn, output, input...) \ |
|
({ \ |
|
int err; \ |
|
\ |
|
might_fault(); \ |
|
\ |
|
asm volatile(ASM_STAC "\n" \ |
|
"1:" #insn "\n\t" \ |
|
"2: " ASM_CLAC "\n" \ |
|
".section .fixup,\"ax\"\n" \ |
|
"3: movl $-1,%[err]\n" \ |
|
" jmp 2b\n" \ |
|
".previous\n" \ |
|
_ASM_EXTABLE(1b, 3b) \ |
|
: [err] "=r" (err), output \ |
|
: "0"(0), input); \ |
|
err; \ |
|
}) |
|
|
|
#define kernel_insn_err(insn, output, input...) \ |
|
({ \ |
|
int err; \ |
|
asm volatile("1:" #insn "\n\t" \ |
|
"2:\n" \ |
|
".section .fixup,\"ax\"\n" \ |
|
"3: movl $-1,%[err]\n" \ |
|
" jmp 2b\n" \ |
|
".previous\n" \ |
|
_ASM_EXTABLE(1b, 3b) \ |
|
: [err] "=r" (err), output \ |
|
: "0"(0), input); \ |
|
err; \ |
|
}) |
|
|
|
#define kernel_insn(insn, output, input...) \ |
|
asm volatile("1:" #insn "\n\t" \ |
|
"2:\n" \ |
|
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ |
|
: output : input) |
|
|
|
static inline int copy_fregs_to_user(struct fregs_state __user *fx) |
|
{ |
|
return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); |
|
} |
|
|
|
static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) |
|
{ |
|
if (IS_ENABLED(CONFIG_X86_32)) |
|
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); |
|
else |
|
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); |
|
|
|
} |
|
|
|
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) |
|
{ |
|
if (IS_ENABLED(CONFIG_X86_32)) |
|
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
else |
|
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
} |
|
|
|
static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) |
|
{ |
|
if (IS_ENABLED(CONFIG_X86_32)) |
|
return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
else |
|
return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
} |
|
|
|
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) |
|
{ |
|
if (IS_ENABLED(CONFIG_X86_32)) |
|
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
else |
|
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
} |
|
|
|
static inline void copy_kernel_to_fregs(struct fregs_state *fx) |
|
{ |
|
kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
} |
|
|
|
static inline int copy_kernel_to_fregs_err(struct fregs_state *fx) |
|
{ |
|
return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
} |
|
|
|
static inline int copy_user_to_fregs(struct fregs_state __user *fx) |
|
{ |
|
return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
|
} |
|
|
|
static inline void copy_fxregs_to_kernel(struct fpu *fpu) |
|
{ |
|
if (IS_ENABLED(CONFIG_X86_32)) |
|
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); |
|
else |
|
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); |
|
} |
|
|
|
/* These macros all use (%edi)/(%rdi) as the single memory argument. */ |
|
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" |
|
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" |
|
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" |
|
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" |
|
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" |
|
|
|
#define XSTATE_OP(op, st, lmask, hmask, err) \ |
|
asm volatile("1:" op "\n\t" \ |
|
"xor %[err], %[err]\n" \ |
|
"2:\n\t" \ |
|
".pushsection .fixup,\"ax\"\n\t" \ |
|
"3: movl $-2,%[err]\n\t" \ |
|
"jmp 2b\n\t" \ |
|
".popsection\n\t" \ |
|
_ASM_EXTABLE(1b, 3b) \ |
|
: [err] "=r" (err) \ |
|
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ |
|
: "memory") |
|
|
|
/* |
|
* If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact |
|
* format and supervisor states in addition to modified optimization in |
|
* XSAVEOPT. |
|
* |
|
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT |
|
* supports modified optimization which is not supported by XSAVE. |
|
* |
|
* We use XSAVE as a fallback. |
|
* |
|
* The 661 label is defined in the ALTERNATIVE* macros as the address of the |
|
* original instruction which gets replaced. We need to use it here as the |
|
* address of the instruction where we might get an exception at. |
|
*/ |
|
#define XSTATE_XSAVE(st, lmask, hmask, err) \ |
|
asm volatile(ALTERNATIVE_2(XSAVE, \ |
|
XSAVEOPT, X86_FEATURE_XSAVEOPT, \ |
|
XSAVES, X86_FEATURE_XSAVES) \ |
|
"\n" \ |
|
"xor %[err], %[err]\n" \ |
|
"3:\n" \ |
|
".pushsection .fixup,\"ax\"\n" \ |
|
"4: movl $-2, %[err]\n" \ |
|
"jmp 3b\n" \ |
|
".popsection\n" \ |
|
_ASM_EXTABLE(661b, 4b) \ |
|
: [err] "=r" (err) \ |
|
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ |
|
: "memory") |
|
|
|
/* |
|
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact |
|
* XSAVE area format. |
|
*/ |
|
#define XSTATE_XRESTORE(st, lmask, hmask) \ |
|
asm volatile(ALTERNATIVE(XRSTOR, \ |
|
XRSTORS, X86_FEATURE_XSAVES) \ |
|
"\n" \ |
|
"3:\n" \ |
|
_ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ |
|
: \ |
|
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ |
|
: "memory") |
|
|
|
/* |
|
* This function is called only during boot time when x86 caps are not set |
|
* up and alternative can not be used yet. |
|
*/ |
|
static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) |
|
{ |
|
u64 mask = xfeatures_mask_all; |
|
u32 lmask = mask; |
|
u32 hmask = mask >> 32; |
|
int err; |
|
|
|
WARN_ON(system_state != SYSTEM_BOOTING); |
|
|
|
if (boot_cpu_has(X86_FEATURE_XSAVES)) |
|
XSTATE_OP(XSAVES, xstate, lmask, hmask, err); |
|
else |
|
XSTATE_OP(XSAVE, xstate, lmask, hmask, err); |
|
|
|
/* We should never fault when copying to a kernel buffer: */ |
|
WARN_ON_FPU(err); |
|
} |
|
|
|
/* |
|
* This function is called only during boot time when x86 caps are not set |
|
* up and alternative can not be used yet. |
|
*/ |
|
static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) |
|
{ |
|
u64 mask = -1; |
|
u32 lmask = mask; |
|
u32 hmask = mask >> 32; |
|
int err; |
|
|
|
WARN_ON(system_state != SYSTEM_BOOTING); |
|
|
|
if (boot_cpu_has(X86_FEATURE_XSAVES)) |
|
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); |
|
else |
|
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); |
|
|
|
/* |
|
* We should never fault when copying from a kernel buffer, and the FPU |
|
* state we set at boot time should be valid. |
|
*/ |
|
WARN_ON_FPU(err); |
|
} |
|
|
|
/* |
|
* Save processor xstate to xsave area. |
|
*/ |
|
static inline void copy_xregs_to_kernel(struct xregs_state *xstate) |
|
{ |
|
u64 mask = xfeatures_mask_all; |
|
u32 lmask = mask; |
|
u32 hmask = mask >> 32; |
|
int err; |
|
|
|
WARN_ON_FPU(!alternatives_patched); |
|
|
|
XSTATE_XSAVE(xstate, lmask, hmask, err); |
|
|
|
/* We should never fault when copying to a kernel buffer: */ |
|
WARN_ON_FPU(err); |
|
} |
|
|
|
/* |
|
* Restore processor xstate from xsave area. |
|
*/ |
|
static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) |
|
{ |
|
u32 lmask = mask; |
|
u32 hmask = mask >> 32; |
|
|
|
XSTATE_XRESTORE(xstate, lmask, hmask); |
|
} |
|
|
|
/* |
|
* Save xstate to user space xsave area. |
|
* |
|
* We don't use modified optimization because xrstor/xrstors might track |
|
* a different application. |
|
* |
|
* We don't use compacted format xsave area for |
|
* backward compatibility for old applications which don't understand |
|
* compacted format of xsave area. |
|
*/ |
|
static inline int copy_xregs_to_user(struct xregs_state __user *buf) |
|
{ |
|
u64 mask = xfeatures_mask_user(); |
|
u32 lmask = mask; |
|
u32 hmask = mask >> 32; |
|
int err; |
|
|
|
/* |
|
* Clear the xsave header first, so that reserved fields are |
|
* initialized to zero. |
|
*/ |
|
err = __clear_user(&buf->header, sizeof(buf->header)); |
|
if (unlikely(err)) |
|
return -EFAULT; |
|
|
|
stac(); |
|
XSTATE_OP(XSAVE, buf, lmask, hmask, err); |
|
clac(); |
|
|
|
return err; |
|
} |
|
|
|
/* |
|
* Restore xstate from user space xsave area. |
|
*/ |
|
static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) |
|
{ |
|
struct xregs_state *xstate = ((__force struct xregs_state *)buf); |
|
u32 lmask = mask; |
|
u32 hmask = mask >> 32; |
|
int err; |
|
|
|
stac(); |
|
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); |
|
clac(); |
|
|
|
return err; |
|
} |
|
|
|
/* |
|
* Restore xstate from kernel space xsave area, return an error code instead of |
|
* an exception. |
|
*/ |
|
static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) |
|
{ |
|
u32 lmask = mask; |
|
u32 hmask = mask >> 32; |
|
int err; |
|
|
|
if (static_cpu_has(X86_FEATURE_XSAVES)) |
|
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); |
|
else |
|
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); |
|
|
|
return err; |
|
} |
|
|
|
extern int copy_fpregs_to_fpstate(struct fpu *fpu); |
|
|
|
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) |
|
{ |
|
if (use_xsave()) { |
|
copy_kernel_to_xregs(&fpstate->xsave, mask); |
|
} else { |
|
if (use_fxsr()) |
|
copy_kernel_to_fxregs(&fpstate->fxsave); |
|
else |
|
copy_kernel_to_fregs(&fpstate->fsave); |
|
} |
|
} |
|
|
|
static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) |
|
{ |
|
/* |
|
* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is |
|
* pending. Clear the x87 state here by setting it to fixed values. |
|
* "m" is a random variable that should be in L1. |
|
*/ |
|
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { |
|
asm volatile( |
|
"fnclex\n\t" |
|
"emms\n\t" |
|
"fildl %P[addr]" /* set F?P to defined value */ |
|
: : [addr] "m" (fpstate)); |
|
} |
|
|
|
__copy_kernel_to_fpregs(fpstate, -1); |
|
} |
|
|
|
extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); |
|
|
|
/* |
|
* FPU context switch related helper methods: |
|
*/ |
|
|
|
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); |
|
|
|
/* |
|
* The in-register FPU state for an FPU context on a CPU is assumed to be |
|
* valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx |
|
* matches the FPU. |
|
* |
|
* If the FPU register state is valid, the kernel can skip restoring the |
|
* FPU state from memory. |
|
* |
|
* Any code that clobbers the FPU registers or updates the in-memory |
|
* FPU state for a task MUST let the rest of the kernel know that the |
|
* FPU registers are no longer valid for this task. |
|
* |
|
* Either one of these invalidation functions is enough. Invalidate |
|
* a resource you control: CPU if using the CPU for something else |
|
* (with preemption disabled), FPU for the current task, or a task that |
|
* is prevented from running by the current task. |
|
*/ |
|
static inline void __cpu_invalidate_fpregs_state(void) |
|
{ |
|
__this_cpu_write(fpu_fpregs_owner_ctx, NULL); |
|
} |
|
|
|
static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) |
|
{ |
|
fpu->last_cpu = -1; |
|
} |
|
|
|
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) |
|
{ |
|
return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; |
|
} |
|
|
|
/* |
|
* These generally need preemption protection to work, |
|
* do try to avoid using these on their own: |
|
*/ |
|
static inline void fpregs_deactivate(struct fpu *fpu) |
|
{ |
|
this_cpu_write(fpu_fpregs_owner_ctx, NULL); |
|
trace_x86_fpu_regs_deactivated(fpu); |
|
} |
|
|
|
static inline void fpregs_activate(struct fpu *fpu) |
|
{ |
|
this_cpu_write(fpu_fpregs_owner_ctx, fpu); |
|
trace_x86_fpu_regs_activated(fpu); |
|
} |
|
|
|
/* |
|
* Internal helper, do not use directly. Use switch_fpu_return() instead. |
|
*/ |
|
static inline void __fpregs_load_activate(void) |
|
{ |
|
struct fpu *fpu = ¤t->thread.fpu; |
|
int cpu = smp_processor_id(); |
|
|
|
if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) |
|
return; |
|
|
|
if (!fpregs_state_valid(fpu, cpu)) { |
|
copy_kernel_to_fpregs(&fpu->state); |
|
fpregs_activate(fpu); |
|
fpu->last_cpu = cpu; |
|
} |
|
clear_thread_flag(TIF_NEED_FPU_LOAD); |
|
} |
|
|
|
/* |
|
* FPU state switching for scheduling. |
|
* |
|
* This is a two-stage process: |
|
* |
|
* - switch_fpu_prepare() saves the old state. |
|
* This is done within the context of the old process. |
|
* |
|
* - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state |
|
* will get loaded on return to userspace, or when the kernel needs it. |
|
* |
|
* If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers |
|
* are saved in the current thread's FPU register state. |
|
* |
|
* If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not |
|
* hold current()'s FPU registers. It is required to load the |
|
* registers before returning to userland or using the content |
|
* otherwise. |
|
* |
|
* The FPU context is only stored/restored for a user task and |
|
* PF_KTHREAD is used to distinguish between kernel and user threads. |
|
*/ |
|
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) |
|
{ |
|
if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { |
|
if (!copy_fpregs_to_fpstate(old_fpu)) |
|
old_fpu->last_cpu = -1; |
|
else |
|
old_fpu->last_cpu = cpu; |
|
|
|
/* But leave fpu_fpregs_owner_ctx! */ |
|
trace_x86_fpu_regs_deactivated(old_fpu); |
|
} |
|
} |
|
|
|
/* |
|
* Misc helper functions: |
|
*/ |
|
|
|
/* |
|
* Load PKRU from the FPU context if available. Delay loading of the |
|
* complete FPU state until the return to userland. |
|
*/ |
|
static inline void switch_fpu_finish(struct fpu *new_fpu) |
|
{ |
|
u32 pkru_val = init_pkru_value; |
|
struct pkru_state *pk; |
|
|
|
if (!static_cpu_has(X86_FEATURE_FPU)) |
|
return; |
|
|
|
set_thread_flag(TIF_NEED_FPU_LOAD); |
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) |
|
return; |
|
|
|
/* |
|
* PKRU state is switched eagerly because it needs to be valid before we |
|
* return to userland e.g. for a copy_to_user() operation. |
|
*/ |
|
if (current->mm) { |
|
pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); |
|
if (pk) |
|
pkru_val = pk->pkru; |
|
} |
|
__write_pkru(pkru_val); |
|
|
|
/* |
|
* Expensive PASID MSR write will be avoided in update_pasid() because |
|
* TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated |
|
* unless it's different from mm->pasid to reduce overhead. |
|
*/ |
|
update_pasid(); |
|
} |
|
|
|
#endif /* _ASM_X86_FPU_INTERNAL_H */
|
|
|