forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
472 lines
11 KiB
472 lines
11 KiB
/* SPDX-License-Identifier: GPL-2.0 */ |
|
.file "wm_sqrt.S" |
|
/*---------------------------------------------------------------------------+ |
|
| wm_sqrt.S | |
|
| | |
|
| Fixed point arithmetic square root evaluation. | |
|
| | |
|
| Copyright (C) 1992,1993,1995,1997 | |
|
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
|
| Australia. E-mail billm@suburbia.net | |
|
| | |
|
| Call from C as: | |
|
| int wm_sqrt(FPU_REG *n, unsigned int control_word) | |
|
| | |
|
+---------------------------------------------------------------------------*/ |
|
|
|
/*---------------------------------------------------------------------------+ |
|
| wm_sqrt(FPU_REG *n, unsigned int control_word) | |
|
| returns the square root of n in n. | |
|
| | |
|
| Use Newton's method to compute the square root of a number, which must | |
|
| be in the range [1.0 .. 4.0), to 64 bits accuracy. | |
|
| Does not check the sign or tag of the argument. | |
|
| Sets the exponent, but not the sign or tag of the result. | |
|
| | |
|
| The guess is kept in %esi:%edi | |
|
+---------------------------------------------------------------------------*/ |
|
|
|
#include "exception.h" |
|
#include "fpu_emu.h" |
|
|
|
|
|
#ifndef NON_REENTRANT_FPU |
|
/* Local storage on the stack: */ |
|
#define FPU_accum_3 -4(%ebp) /* ms word */ |
|
#define FPU_accum_2 -8(%ebp) |
|
#define FPU_accum_1 -12(%ebp) |
|
#define FPU_accum_0 -16(%ebp) |
|
|
|
/* |
|
* The de-normalised argument: |
|
* sq_2 sq_1 sq_0 |
|
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
|
* ^ binary point here |
|
*/ |
|
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ |
|
#define FPU_fsqrt_arg_1 -24(%ebp) |
|
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ |
|
|
|
#else |
|
/* Local storage in a static area: */ |
|
.data |
|
.align 4,0 |
|
FPU_accum_3: |
|
.long 0 /* ms word */ |
|
FPU_accum_2: |
|
.long 0 |
|
FPU_accum_1: |
|
.long 0 |
|
FPU_accum_0: |
|
.long 0 |
|
|
|
/* The de-normalised argument: |
|
sq_2 sq_1 sq_0 |
|
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
|
^ binary point here |
|
*/ |
|
FPU_fsqrt_arg_2: |
|
.long 0 /* ms word */ |
|
FPU_fsqrt_arg_1: |
|
.long 0 |
|
FPU_fsqrt_arg_0: |
|
.long 0 /* ls word, at most the ms bit is set */ |
|
#endif /* NON_REENTRANT_FPU */ |
|
|
|
|
|
.text |
|
SYM_FUNC_START(wm_sqrt) |
|
pushl %ebp |
|
movl %esp,%ebp |
|
#ifndef NON_REENTRANT_FPU |
|
subl $28,%esp |
|
#endif /* NON_REENTRANT_FPU */ |
|
pushl %esi |
|
pushl %edi |
|
pushl %ebx |
|
|
|
movl PARAM1,%esi |
|
|
|
movl SIGH(%esi),%eax |
|
movl SIGL(%esi),%ecx |
|
xorl %edx,%edx |
|
|
|
/* We use a rough linear estimate for the first guess.. */ |
|
|
|
cmpw EXP_BIAS,EXP(%esi) |
|
jnz sqrt_arg_ge_2 |
|
|
|
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ |
|
rcrl $1,%ecx |
|
rcrl $1,%edx |
|
|
|
sqrt_arg_ge_2: |
|
/* From here on, n is never accessed directly again until it is |
|
replaced by the answer. */ |
|
|
|
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ |
|
movl %ecx,FPU_fsqrt_arg_1 |
|
movl %edx,FPU_fsqrt_arg_0 |
|
|
|
/* Make a linear first estimate */ |
|
shrl $1,%eax |
|
addl $0x40000000,%eax |
|
movl $0xaaaaaaaa,%ecx |
|
mull %ecx |
|
shll %edx /* max result was 7fff... */ |
|
testl $0x80000000,%edx /* but min was 3fff... */ |
|
jnz sqrt_prelim_no_adjust |
|
|
|
movl $0x80000000,%edx /* round up */ |
|
|
|
sqrt_prelim_no_adjust: |
|
movl %edx,%esi /* Our first guess */ |
|
|
|
/* We have now computed (approx) (2 + x) / 3, which forms the basis |
|
for a few iterations of Newton's method */ |
|
|
|
movl FPU_fsqrt_arg_2,%ecx /* ms word */ |
|
|
|
/* |
|
* From our initial estimate, three iterations are enough to get us |
|
* to 30 bits or so. This will then allow two iterations at better |
|
* precision to complete the process. |
|
*/ |
|
|
|
/* Compute (g + n/g)/2 at each iteration (g is the guess). */ |
|
shrl %ecx /* Doing this first will prevent a divide */ |
|
/* overflow later. */ |
|
|
|
movl %ecx,%edx /* msw of the arg / 2 */ |
|
divl %esi /* current estimate */ |
|
shrl %esi /* divide by 2 */ |
|
addl %eax,%esi /* the new estimate */ |
|
|
|
movl %ecx,%edx |
|
divl %esi |
|
shrl %esi |
|
addl %eax,%esi |
|
|
|
movl %ecx,%edx |
|
divl %esi |
|
shrl %esi |
|
addl %eax,%esi |
|
|
|
/* |
|
* Now that an estimate accurate to about 30 bits has been obtained (in %esi), |
|
* we improve it to 60 bits or so. |
|
* |
|
* The strategy from now on is to compute new estimates from |
|
* guess := guess + (n - guess^2) / (2 * guess) |
|
*/ |
|
|
|
/* First, find the square of the guess */ |
|
movl %esi,%eax |
|
mull %esi |
|
/* guess^2 now in %edx:%eax */ |
|
|
|
movl FPU_fsqrt_arg_1,%ecx |
|
subl %ecx,%eax |
|
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ |
|
sbbl %ecx,%edx |
|
jnc sqrt_stage_2_positive |
|
|
|
/* Subtraction gives a negative result, |
|
negate the result before division. */ |
|
notl %edx |
|
notl %eax |
|
addl $1,%eax |
|
adcl $0,%edx |
|
|
|
divl %esi |
|
movl %eax,%ecx |
|
|
|
movl %edx,%eax |
|
divl %esi |
|
jmp sqrt_stage_2_finish |
|
|
|
sqrt_stage_2_positive: |
|
divl %esi |
|
movl %eax,%ecx |
|
|
|
movl %edx,%eax |
|
divl %esi |
|
|
|
notl %ecx |
|
notl %eax |
|
addl $1,%eax |
|
adcl $0,%ecx |
|
|
|
sqrt_stage_2_finish: |
|
sarl $1,%ecx /* divide by 2 */ |
|
rcrl $1,%eax |
|
|
|
/* Form the new estimate in %esi:%edi */ |
|
movl %eax,%edi |
|
addl %ecx,%esi |
|
|
|
jnz sqrt_stage_2_done /* result should be [1..2) */ |
|
|
|
#ifdef PARANOID |
|
/* It should be possible to get here only if the arg is ffff....ffff */ |
|
cmpl $0xffffffff,FPU_fsqrt_arg_1 |
|
jnz sqrt_stage_2_error |
|
#endif /* PARANOID */ |
|
|
|
/* The best rounded result. */ |
|
xorl %eax,%eax |
|
decl %eax |
|
movl %eax,%edi |
|
movl %eax,%esi |
|
movl $0x7fffffff,%eax |
|
jmp sqrt_round_result |
|
|
|
#ifdef PARANOID |
|
sqrt_stage_2_error: |
|
pushl EX_INTERNAL|0x213 |
|
call EXCEPTION |
|
#endif /* PARANOID */ |
|
|
|
sqrt_stage_2_done: |
|
|
|
/* Now the square root has been computed to better than 60 bits. */ |
|
|
|
/* Find the square of the guess. */ |
|
movl %edi,%eax /* ls word of guess */ |
|
mull %edi |
|
movl %edx,FPU_accum_1 |
|
|
|
movl %esi,%eax |
|
mull %esi |
|
movl %edx,FPU_accum_3 |
|
movl %eax,FPU_accum_2 |
|
|
|
movl %edi,%eax |
|
mull %esi |
|
addl %eax,FPU_accum_1 |
|
adcl %edx,FPU_accum_2 |
|
adcl $0,FPU_accum_3 |
|
|
|
/* movl %esi,%eax */ |
|
/* mull %edi */ |
|
addl %eax,FPU_accum_1 |
|
adcl %edx,FPU_accum_2 |
|
adcl $0,FPU_accum_3 |
|
|
|
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ |
|
|
|
movl FPU_fsqrt_arg_0,%eax /* get normalized n */ |
|
subl %eax,FPU_accum_1 |
|
movl FPU_fsqrt_arg_1,%eax |
|
sbbl %eax,FPU_accum_2 |
|
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ |
|
sbbl %eax,FPU_accum_3 |
|
jnc sqrt_stage_3_positive |
|
|
|
/* Subtraction gives a negative result, |
|
negate the result before division */ |
|
notl FPU_accum_1 |
|
notl FPU_accum_2 |
|
notl FPU_accum_3 |
|
addl $1,FPU_accum_1 |
|
adcl $0,FPU_accum_2 |
|
|
|
#ifdef PARANOID |
|
adcl $0,FPU_accum_3 /* This must be zero */ |
|
jz sqrt_stage_3_no_error |
|
|
|
sqrt_stage_3_error: |
|
pushl EX_INTERNAL|0x207 |
|
call EXCEPTION |
|
|
|
sqrt_stage_3_no_error: |
|
#endif /* PARANOID */ |
|
|
|
movl FPU_accum_2,%edx |
|
movl FPU_accum_1,%eax |
|
divl %esi |
|
movl %eax,%ecx |
|
|
|
movl %edx,%eax |
|
divl %esi |
|
|
|
sarl $1,%ecx /* divide by 2 */ |
|
rcrl $1,%eax |
|
|
|
/* prepare to round the result */ |
|
|
|
addl %ecx,%edi |
|
adcl $0,%esi |
|
|
|
jmp sqrt_stage_3_finished |
|
|
|
sqrt_stage_3_positive: |
|
movl FPU_accum_2,%edx |
|
movl FPU_accum_1,%eax |
|
divl %esi |
|
movl %eax,%ecx |
|
|
|
movl %edx,%eax |
|
divl %esi |
|
|
|
sarl $1,%ecx /* divide by 2 */ |
|
rcrl $1,%eax |
|
|
|
/* prepare to round the result */ |
|
|
|
notl %eax /* Negate the correction term */ |
|
notl %ecx |
|
addl $1,%eax |
|
adcl $0,%ecx /* carry here ==> correction == 0 */ |
|
adcl $0xffffffff,%esi |
|
|
|
addl %ecx,%edi |
|
adcl $0,%esi |
|
|
|
sqrt_stage_3_finished: |
|
|
|
/* |
|
* The result in %esi:%edi:%esi should be good to about 90 bits here, |
|
* and the rounding information here does not have sufficient accuracy |
|
* in a few rare cases. |
|
*/ |
|
cmpl $0xffffffe0,%eax |
|
ja sqrt_near_exact_x |
|
|
|
cmpl $0x00000020,%eax |
|
jb sqrt_near_exact |
|
|
|
cmpl $0x7fffffe0,%eax |
|
jb sqrt_round_result |
|
|
|
cmpl $0x80000020,%eax |
|
jb sqrt_get_more_precision |
|
|
|
sqrt_round_result: |
|
/* Set up for rounding operations */ |
|
movl %eax,%edx |
|
movl %esi,%eax |
|
movl %edi,%ebx |
|
movl PARAM1,%edi |
|
movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ |
|
jmp fpu_reg_round |
|
|
|
|
|
sqrt_near_exact_x: |
|
/* First, the estimate must be rounded up. */ |
|
addl $1,%edi |
|
adcl $0,%esi |
|
|
|
sqrt_near_exact: |
|
/* |
|
* This is an easy case because x^1/2 is monotonic. |
|
* We need just find the square of our estimate, compare it |
|
* with the argument, and deduce whether our estimate is |
|
* above, below, or exact. We use the fact that the estimate |
|
* is known to be accurate to about 90 bits. |
|
*/ |
|
movl %edi,%eax /* ls word of guess */ |
|
mull %edi |
|
movl %edx,%ebx /* 2nd ls word of square */ |
|
movl %eax,%ecx /* ls word of square */ |
|
|
|
movl %edi,%eax |
|
mull %esi |
|
addl %eax,%ebx |
|
addl %eax,%ebx |
|
|
|
#ifdef PARANOID |
|
cmp $0xffffffb0,%ebx |
|
jb sqrt_near_exact_ok |
|
|
|
cmp $0x00000050,%ebx |
|
ja sqrt_near_exact_ok |
|
|
|
pushl EX_INTERNAL|0x214 |
|
call EXCEPTION |
|
|
|
sqrt_near_exact_ok: |
|
#endif /* PARANOID */ |
|
|
|
or %ebx,%ebx |
|
js sqrt_near_exact_small |
|
|
|
jnz sqrt_near_exact_large |
|
|
|
or %ebx,%edx |
|
jnz sqrt_near_exact_large |
|
|
|
/* Our estimate is exactly the right answer */ |
|
xorl %eax,%eax |
|
jmp sqrt_round_result |
|
|
|
sqrt_near_exact_small: |
|
/* Our estimate is too small */ |
|
movl $0x000000ff,%eax |
|
jmp sqrt_round_result |
|
|
|
sqrt_near_exact_large: |
|
/* Our estimate is too large, we need to decrement it */ |
|
subl $1,%edi |
|
sbbl $0,%esi |
|
movl $0xffffff00,%eax |
|
jmp sqrt_round_result |
|
|
|
|
|
sqrt_get_more_precision: |
|
/* This case is almost the same as the above, except we start |
|
with an extra bit of precision in the estimate. */ |
|
stc /* The extra bit. */ |
|
rcll $1,%edi /* Shift the estimate left one bit */ |
|
rcll $1,%esi |
|
|
|
movl %edi,%eax /* ls word of guess */ |
|
mull %edi |
|
movl %edx,%ebx /* 2nd ls word of square */ |
|
movl %eax,%ecx /* ls word of square */ |
|
|
|
movl %edi,%eax |
|
mull %esi |
|
addl %eax,%ebx |
|
addl %eax,%ebx |
|
|
|
/* Put our estimate back to its original value */ |
|
stc /* The ms bit. */ |
|
rcrl $1,%esi /* Shift the estimate left one bit */ |
|
rcrl $1,%edi |
|
|
|
#ifdef PARANOID |
|
cmp $0xffffff60,%ebx |
|
jb sqrt_more_prec_ok |
|
|
|
cmp $0x000000a0,%ebx |
|
ja sqrt_more_prec_ok |
|
|
|
pushl EX_INTERNAL|0x215 |
|
call EXCEPTION |
|
|
|
sqrt_more_prec_ok: |
|
#endif /* PARANOID */ |
|
|
|
or %ebx,%ebx |
|
js sqrt_more_prec_small |
|
|
|
jnz sqrt_more_prec_large |
|
|
|
or %ebx,%ecx |
|
jnz sqrt_more_prec_large |
|
|
|
/* Our estimate is exactly the right answer */ |
|
movl $0x80000000,%eax |
|
jmp sqrt_round_result |
|
|
|
sqrt_more_prec_small: |
|
/* Our estimate is too small */ |
|
movl $0x800000ff,%eax |
|
jmp sqrt_round_result |
|
|
|
sqrt_more_prec_large: |
|
/* Our estimate is too large */ |
|
movl $0x7fffff00,%eax |
|
jmp sqrt_round_result |
|
SYM_FUNC_END(wm_sqrt)
|
|
|