mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
359 lines
10 KiB
359 lines
10 KiB
// SPDX-License-Identifier: GPL-2.0-or-later |
|
/* |
|
* Kernel Probes Jump Optimization (Optprobes) |
|
* |
|
* Copyright (C) IBM Corporation, 2002, 2004 |
|
* Copyright (C) Hitachi Ltd., 2012 |
|
* Copyright (C) Huawei Inc., 2014 |
|
*/ |
|
|
|
#include <linux/kprobes.h> |
|
#include <linux/jump_label.h> |
|
#include <asm/kprobes.h> |
|
#include <asm/cacheflush.h> |
|
/* for arm_gen_branch */ |
|
#include <asm/insn.h> |
|
/* for patch_text */ |
|
#include <asm/patch.h> |
|
|
|
#include "core.h" |
|
|
|
/* |
|
* See register_usage_flags. If the probed instruction doesn't use PC, |
|
* we can copy it into template and have it executed directly without |
|
* simulation or emulation. |
|
*/ |
|
#define ARM_REG_PC 15 |
|
#define can_kprobe_direct_exec(m) (!test_bit(ARM_REG_PC, &(m))) |
|
|
|
/* |
|
* NOTE: the first sub and add instruction will be modified according |
|
* to the stack cost of the instruction. |
|
*/ |
|
asm ( |
|
".global optprobe_template_entry\n" |
|
"optprobe_template_entry:\n" |
|
".global optprobe_template_sub_sp\n" |
|
"optprobe_template_sub_sp:" |
|
" sub sp, sp, #0xff\n" |
|
" stmia sp, {r0 - r14} \n" |
|
".global optprobe_template_add_sp\n" |
|
"optprobe_template_add_sp:" |
|
" add r3, sp, #0xff\n" |
|
" str r3, [sp, #52]\n" |
|
" mrs r4, cpsr\n" |
|
" str r4, [sp, #64]\n" |
|
" mov r1, sp\n" |
|
" ldr r0, 1f\n" |
|
" ldr r2, 2f\n" |
|
/* |
|
* AEABI requires an 8-bytes alignment stack. If |
|
* SP % 8 != 0 (SP % 4 == 0 should be ensured), |
|
* alloc more bytes here. |
|
*/ |
|
" and r4, sp, #4\n" |
|
" sub sp, sp, r4\n" |
|
#if __LINUX_ARM_ARCH__ >= 5 |
|
" blx r2\n" |
|
#else |
|
" mov lr, pc\n" |
|
" mov pc, r2\n" |
|
#endif |
|
" add sp, sp, r4\n" |
|
" ldr r1, [sp, #64]\n" |
|
" tst r1, #"__stringify(PSR_T_BIT)"\n" |
|
" ldrne r2, [sp, #60]\n" |
|
" orrne r2, #1\n" |
|
" strne r2, [sp, #60] @ set bit0 of PC for thumb\n" |
|
" msr cpsr_cxsf, r1\n" |
|
".global optprobe_template_restore_begin\n" |
|
"optprobe_template_restore_begin:\n" |
|
" ldmia sp, {r0 - r15}\n" |
|
".global optprobe_template_restore_orig_insn\n" |
|
"optprobe_template_restore_orig_insn:\n" |
|
" nop\n" |
|
".global optprobe_template_restore_end\n" |
|
"optprobe_template_restore_end:\n" |
|
" nop\n" |
|
".global optprobe_template_val\n" |
|
"optprobe_template_val:\n" |
|
"1: .long 0\n" |
|
".global optprobe_template_call\n" |
|
"optprobe_template_call:\n" |
|
"2: .long 0\n" |
|
".global optprobe_template_end\n" |
|
"optprobe_template_end:\n"); |
|
|
|
#define TMPL_VAL_IDX \ |
|
((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) |
|
#define TMPL_CALL_IDX \ |
|
((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) |
|
#define TMPL_END_IDX \ |
|
((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) |
|
#define TMPL_ADD_SP \ |
|
((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) |
|
#define TMPL_SUB_SP \ |
|
((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) |
|
#define TMPL_RESTORE_BEGIN \ |
|
((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) |
|
#define TMPL_RESTORE_ORIGN_INSN \ |
|
((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) |
|
#define TMPL_RESTORE_END \ |
|
((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) |
|
|
|
/* |
|
* ARM can always optimize an instruction when using ARM ISA, except |
|
* instructions like 'str r0, [sp, r1]' which store to stack and unable |
|
* to determine stack space consumption statically. |
|
*/ |
|
int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) |
|
{ |
|
return optinsn->insn != NULL; |
|
} |
|
|
|
/* |
|
* In ARM ISA, kprobe opt always replace one instruction (4 bytes |
|
* aligned and 4 bytes long). It is impossible to encounter another |
|
* kprobe in the address range. So always return 0. |
|
*/ |
|
int arch_check_optimized_kprobe(struct optimized_kprobe *op) |
|
{ |
|
return 0; |
|
} |
|
|
|
/* Caller must ensure addr & 3 == 0 */ |
|
static int can_optimize(struct kprobe *kp) |
|
{ |
|
if (kp->ainsn.stack_space < 0) |
|
return 0; |
|
/* |
|
* 255 is the biggest imm can be used in 'sub r0, r0, #<imm>'. |
|
* Number larger than 255 needs special encoding. |
|
*/ |
|
if (kp->ainsn.stack_space > 255 - sizeof(struct pt_regs)) |
|
return 0; |
|
return 1; |
|
} |
|
|
|
/* Free optimized instruction slot */ |
|
static void |
|
__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) |
|
{ |
|
if (op->optinsn.insn) { |
|
free_optinsn_slot(op->optinsn.insn, dirty); |
|
op->optinsn.insn = NULL; |
|
} |
|
} |
|
|
|
extern void kprobe_handler(struct pt_regs *regs); |
|
|
|
static void |
|
optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) |
|
{ |
|
unsigned long flags; |
|
struct kprobe *p = &op->kp; |
|
struct kprobe_ctlblk *kcb; |
|
|
|
/* Save skipped registers */ |
|
regs->ARM_pc = (unsigned long)op->kp.addr; |
|
regs->ARM_ORIG_r0 = ~0UL; |
|
|
|
local_irq_save(flags); |
|
kcb = get_kprobe_ctlblk(); |
|
|
|
if (kprobe_running()) { |
|
kprobes_inc_nmissed_count(&op->kp); |
|
} else { |
|
__this_cpu_write(current_kprobe, &op->kp); |
|
kcb->kprobe_status = KPROBE_HIT_ACTIVE; |
|
opt_pre_handler(&op->kp, regs); |
|
__this_cpu_write(current_kprobe, NULL); |
|
} |
|
|
|
/* |
|
* We singlestep the replaced instruction only when it can't be |
|
* executed directly during restore. |
|
*/ |
|
if (!p->ainsn.kprobe_direct_exec) |
|
op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); |
|
|
|
local_irq_restore(flags); |
|
} |
|
NOKPROBE_SYMBOL(optimized_callback) |
|
|
|
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) |
|
{ |
|
kprobe_opcode_t *code; |
|
unsigned long rel_chk; |
|
unsigned long val; |
|
unsigned long stack_protect = sizeof(struct pt_regs); |
|
|
|
if (!can_optimize(orig)) |
|
return -EILSEQ; |
|
|
|
code = get_optinsn_slot(); |
|
if (!code) |
|
return -ENOMEM; |
|
|
|
/* |
|
* Verify if the address gap is in 32MiB range, because this uses |
|
* a relative jump. |
|
* |
|
* kprobe opt use a 'b' instruction to branch to optinsn.insn. |
|
* According to ARM manual, branch instruction is: |
|
* |
|
* 31 28 27 24 23 0 |
|
* +------+---+---+---+---+----------------+ |
|
* | cond | 1 | 0 | 1 | 0 | imm24 | |
|
* +------+---+---+---+---+----------------+ |
|
* |
|
* imm24 is a signed 24 bits integer. The real branch offset is computed |
|
* by: imm32 = SignExtend(imm24:'00', 32); |
|
* |
|
* So the maximum forward branch should be: |
|
* (0x007fffff << 2) = 0x01fffffc = 0x1fffffc |
|
* The maximum backword branch should be: |
|
* (0xff800000 << 2) = 0xfe000000 = -0x2000000 |
|
* |
|
* We can simply check (rel & 0xfe000003): |
|
* if rel is positive, (rel & 0xfe000000) shoule be 0 |
|
* if rel is negitive, (rel & 0xfe000000) should be 0xfe000000 |
|
* the last '3' is used for alignment checking. |
|
*/ |
|
rel_chk = (unsigned long)((long)code - |
|
(long)orig->addr + 8) & 0xfe000003; |
|
|
|
if ((rel_chk != 0) && (rel_chk != 0xfe000000)) { |
|
/* |
|
* Different from x86, we free code buf directly instead of |
|
* calling __arch_remove_optimized_kprobe() because |
|
* we have not fill any field in op. |
|
*/ |
|
free_optinsn_slot(code, 0); |
|
return -ERANGE; |
|
} |
|
|
|
/* Copy arch-dep-instance from template. */ |
|
memcpy(code, (unsigned long *)optprobe_template_entry, |
|
TMPL_END_IDX * sizeof(kprobe_opcode_t)); |
|
|
|
/* Adjust buffer according to instruction. */ |
|
BUG_ON(orig->ainsn.stack_space < 0); |
|
|
|
stack_protect += orig->ainsn.stack_space; |
|
|
|
/* Should have been filtered by can_optimize(). */ |
|
BUG_ON(stack_protect > 255); |
|
|
|
/* Create a 'sub sp, sp, #<stack_protect>' */ |
|
code[TMPL_SUB_SP] = __opcode_to_mem_arm(0xe24dd000 | stack_protect); |
|
/* Create a 'add r3, sp, #<stack_protect>' */ |
|
code[TMPL_ADD_SP] = __opcode_to_mem_arm(0xe28d3000 | stack_protect); |
|
|
|
/* Set probe information */ |
|
val = (unsigned long)op; |
|
code[TMPL_VAL_IDX] = val; |
|
|
|
/* Set probe function call */ |
|
val = (unsigned long)optimized_callback; |
|
code[TMPL_CALL_IDX] = val; |
|
|
|
/* If possible, copy insn and have it executed during restore */ |
|
orig->ainsn.kprobe_direct_exec = false; |
|
if (can_kprobe_direct_exec(orig->ainsn.register_usage_flags)) { |
|
kprobe_opcode_t final_branch = arm_gen_branch( |
|
(unsigned long)(&code[TMPL_RESTORE_END]), |
|
(unsigned long)(op->kp.addr) + 4); |
|
if (final_branch != 0) { |
|
/* |
|
* Replace original 'ldmia sp, {r0 - r15}' with |
|
* 'ldmia {r0 - r14}', restore all registers except pc. |
|
*/ |
|
code[TMPL_RESTORE_BEGIN] = __opcode_to_mem_arm(0xe89d7fff); |
|
|
|
/* The original probed instruction */ |
|
code[TMPL_RESTORE_ORIGN_INSN] = __opcode_to_mem_arm(orig->opcode); |
|
|
|
/* Jump back to next instruction */ |
|
code[TMPL_RESTORE_END] = __opcode_to_mem_arm(final_branch); |
|
orig->ainsn.kprobe_direct_exec = true; |
|
} |
|
} |
|
|
|
flush_icache_range((unsigned long)code, |
|
(unsigned long)(&code[TMPL_END_IDX])); |
|
|
|
/* Set op->optinsn.insn means prepared. */ |
|
op->optinsn.insn = code; |
|
return 0; |
|
} |
|
|
|
void __kprobes arch_optimize_kprobes(struct list_head *oplist) |
|
{ |
|
struct optimized_kprobe *op, *tmp; |
|
|
|
list_for_each_entry_safe(op, tmp, oplist, list) { |
|
unsigned long insn; |
|
WARN_ON(kprobe_disabled(&op->kp)); |
|
|
|
/* |
|
* Backup instructions which will be replaced |
|
* by jump address |
|
*/ |
|
memcpy(op->optinsn.copied_insn, op->kp.addr, |
|
RELATIVEJUMP_SIZE); |
|
|
|
insn = arm_gen_branch((unsigned long)op->kp.addr, |
|
(unsigned long)op->optinsn.insn); |
|
BUG_ON(insn == 0); |
|
|
|
/* |
|
* Make it a conditional branch if replaced insn |
|
* is consitional |
|
*/ |
|
insn = (__mem_to_opcode_arm( |
|
op->optinsn.copied_insn[0]) & 0xf0000000) | |
|
(insn & 0x0fffffff); |
|
|
|
/* |
|
* Similar to __arch_disarm_kprobe, operations which |
|
* removing breakpoints must be wrapped by stop_machine |
|
* to avoid racing. |
|
*/ |
|
kprobes_remove_breakpoint(op->kp.addr, insn); |
|
|
|
list_del_init(&op->list); |
|
} |
|
} |
|
|
|
void arch_unoptimize_kprobe(struct optimized_kprobe *op) |
|
{ |
|
arch_arm_kprobe(&op->kp); |
|
} |
|
|
|
/* |
|
* Recover original instructions and breakpoints from relative jumps. |
|
* Caller must call with locking kprobe_mutex. |
|
*/ |
|
void arch_unoptimize_kprobes(struct list_head *oplist, |
|
struct list_head *done_list) |
|
{ |
|
struct optimized_kprobe *op, *tmp; |
|
|
|
list_for_each_entry_safe(op, tmp, oplist, list) { |
|
arch_unoptimize_kprobe(op); |
|
list_move(&op->list, done_list); |
|
} |
|
} |
|
|
|
int arch_within_optimized_kprobe(struct optimized_kprobe *op, |
|
unsigned long addr) |
|
{ |
|
return ((unsigned long)op->kp.addr <= addr && |
|
(unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); |
|
} |
|
|
|
void arch_remove_optimized_kprobe(struct optimized_kprobe *op) |
|
{ |
|
__arch_remove_optimized_kprobe(op, 1); |
|
}
|
|
|