mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
231 lines
5.1 KiB
231 lines
5.1 KiB
// SPDX-License-Identifier: GPL-2.0 |
|
/* |
|
* Precise Delay Loops for i386 |
|
* |
|
* Copyright (C) 1993 Linus Torvalds |
|
* Copyright (C) 1997 Martin Mares <[email protected]> |
|
* Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com> |
|
* |
|
* The __delay function must _NOT_ be inlined as its execution time |
|
* depends wildly on alignment on many x86 processors. The additional |
|
* jump magic is needed to get the timing stable on all the CPU's |
|
* we have to worry about. |
|
*/ |
|
|
|
#include <linux/export.h> |
|
#include <linux/sched.h> |
|
#include <linux/timex.h> |
|
#include <linux/preempt.h> |
|
#include <linux/delay.h> |
|
|
|
#include <asm/processor.h> |
|
#include <asm/delay.h> |
|
#include <asm/timer.h> |
|
#include <asm/mwait.h> |
|
|
|
#ifdef CONFIG_SMP |
|
# include <asm/smp.h> |
|
#endif |
|
|
|
static void delay_loop(u64 __loops); |
|
|
|
/* |
|
* Calibration and selection of the delay mechanism happens only once |
|
* during boot. |
|
*/ |
|
static void (*delay_fn)(u64) __ro_after_init = delay_loop; |
|
static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init; |
|
|
|
/* simple loop based delay: */ |
|
static void delay_loop(u64 __loops) |
|
{ |
|
unsigned long loops = (unsigned long)__loops; |
|
|
|
asm volatile( |
|
" test %0,%0 \n" |
|
" jz 3f \n" |
|
" jmp 1f \n" |
|
|
|
".align 16 \n" |
|
"1: jmp 2f \n" |
|
|
|
".align 16 \n" |
|
"2: dec %0 \n" |
|
" jnz 2b \n" |
|
"3: dec %0 \n" |
|
|
|
: /* we don't need output */ |
|
:"a" (loops) |
|
); |
|
} |
|
|
|
/* TSC based delay: */ |
|
static void delay_tsc(u64 cycles) |
|
{ |
|
u64 bclock, now; |
|
int cpu; |
|
|
|
preempt_disable(); |
|
cpu = smp_processor_id(); |
|
bclock = rdtsc_ordered(); |
|
for (;;) { |
|
now = rdtsc_ordered(); |
|
if ((now - bclock) >= cycles) |
|
break; |
|
|
|
/* Allow RT tasks to run */ |
|
preempt_enable(); |
|
rep_nop(); |
|
preempt_disable(); |
|
|
|
/* |
|
* It is possible that we moved to another CPU, and |
|
* since TSC's are per-cpu we need to calculate |
|
* that. The delay must guarantee that we wait "at |
|
* least" the amount of time. Being moved to another |
|
* CPU could make the wait longer but we just need to |
|
* make sure we waited long enough. Rebalance the |
|
* counter for this CPU. |
|
*/ |
|
if (unlikely(cpu != smp_processor_id())) { |
|
cycles -= (now - bclock); |
|
cpu = smp_processor_id(); |
|
bclock = rdtsc_ordered(); |
|
} |
|
} |
|
preempt_enable(); |
|
} |
|
|
|
/* |
|
* On Intel the TPAUSE instruction waits until any of: |
|
* 1) the TSC counter exceeds the value provided in EDX:EAX |
|
* 2) global timeout in IA32_UMWAIT_CONTROL is exceeded |
|
* 3) an external interrupt occurs |
|
*/ |
|
static void delay_halt_tpause(u64 start, u64 cycles) |
|
{ |
|
u64 until = start + cycles; |
|
u32 eax, edx; |
|
|
|
eax = lower_32_bits(until); |
|
edx = upper_32_bits(until); |
|
|
|
/* |
|
* Hard code the deeper (C0.2) sleep state because exit latency is |
|
* small compared to the "microseconds" that usleep() will delay. |
|
*/ |
|
__tpause(TPAUSE_C02_STATE, edx, eax); |
|
} |
|
|
|
/* |
|
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that |
|
* counts with TSC frequency. The input value is the number of TSC cycles |
|
* to wait. MWAITX will also exit when the timer expires. |
|
*/ |
|
static void delay_halt_mwaitx(u64 unused, u64 cycles) |
|
{ |
|
u64 delay; |
|
|
|
delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); |
|
/* |
|
* Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu |
|
* variable as the monitor target. |
|
*/ |
|
__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); |
|
|
|
/* |
|
* AMD, like Intel, supports the EAX hint and EAX=0xf means, do not |
|
* enter any deep C-state and we use it here in delay() to minimize |
|
* wakeup latency. |
|
*/ |
|
__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); |
|
} |
|
|
|
/* |
|
* Call a vendor specific function to delay for a given amount of time. Because |
|
* these functions may return earlier than requested, check for actual elapsed |
|
* time and call again until done. |
|
*/ |
|
static void delay_halt(u64 __cycles) |
|
{ |
|
u64 start, end, cycles = __cycles; |
|
|
|
/* |
|
* Timer value of 0 causes MWAITX to wait indefinitely, unless there |
|
* is a store on the memory monitored by MONITORX. |
|
*/ |
|
if (!cycles) |
|
return; |
|
|
|
start = rdtsc_ordered(); |
|
|
|
for (;;) { |
|
delay_halt_fn(start, cycles); |
|
end = rdtsc_ordered(); |
|
|
|
if (cycles <= end - start) |
|
break; |
|
|
|
cycles -= end - start; |
|
start = end; |
|
} |
|
} |
|
|
|
void __init use_tsc_delay(void) |
|
{ |
|
if (delay_fn == delay_loop) |
|
delay_fn = delay_tsc; |
|
} |
|
|
|
void __init use_tpause_delay(void) |
|
{ |
|
delay_halt_fn = delay_halt_tpause; |
|
delay_fn = delay_halt; |
|
} |
|
|
|
void use_mwaitx_delay(void) |
|
{ |
|
delay_halt_fn = delay_halt_mwaitx; |
|
delay_fn = delay_halt; |
|
} |
|
|
|
int read_current_timer(unsigned long *timer_val) |
|
{ |
|
if (delay_fn == delay_tsc) { |
|
*timer_val = rdtsc(); |
|
return 0; |
|
} |
|
return -1; |
|
} |
|
|
|
void __delay(unsigned long loops) |
|
{ |
|
delay_fn(loops); |
|
} |
|
EXPORT_SYMBOL(__delay); |
|
|
|
noinline void __const_udelay(unsigned long xloops) |
|
{ |
|
unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; |
|
int d0; |
|
|
|
xloops *= 4; |
|
asm("mull %%edx" |
|
:"=d" (xloops), "=&a" (d0) |
|
:"1" (xloops), "0" (lpj * (HZ / 4))); |
|
|
|
__delay(++xloops); |
|
} |
|
EXPORT_SYMBOL(__const_udelay); |
|
|
|
void __udelay(unsigned long usecs) |
|
{ |
|
__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ |
|
} |
|
EXPORT_SYMBOL(__udelay); |
|
|
|
void __ndelay(unsigned long nsecs) |
|
{ |
|
__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ |
|
} |
|
EXPORT_SYMBOL(__ndelay);
|
|
|