mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
93 lines
2.5 KiB
93 lines
2.5 KiB
/* SPDX-License-Identifier: GPL-2.0-only */ |
|
/* |
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) |
|
*/ |
|
|
|
/* This is optimized primarily for the ARC700. |
|
It would be possible to speed up the loops by one cycle / word |
|
respective one cycle / byte by forcing double source 1 alignment, unrolling |
|
by a factor of two, and speculatively loading the second word / byte of |
|
source 1; however, that would increase the overhead for loop setup / finish, |
|
and strcmp might often terminate early. */ |
|
|
|
#include <linux/linkage.h> |
|
|
|
ENTRY_CFI(strcmp) |
|
or r2,r0,r1 |
|
bmsk_s r2,r2,1 |
|
brne r2,0,.Lcharloop |
|
mov_s r12,0x01010101 |
|
ror r5,r12 |
|
.Lwordloop: |
|
ld.ab r2,[r0,4] |
|
ld.ab r3,[r1,4] |
|
nop_s |
|
sub r4,r2,r12 |
|
bic r4,r4,r2 |
|
and r4,r4,r5 |
|
brne r4,0,.Lfound0 |
|
breq r2,r3,.Lwordloop |
|
#ifdef __LITTLE_ENDIAN__ |
|
xor r0,r2,r3 ; mask for difference |
|
sub_s r1,r0,1 |
|
bic_s r0,r0,r1 ; mask for least significant difference bit |
|
sub r1,r5,r0 |
|
xor r0,r5,r1 ; mask for least significant difference byte |
|
and_s r2,r2,r0 |
|
and_s r3,r3,r0 |
|
#endif /* LITTLE ENDIAN */ |
|
cmp_s r2,r3 |
|
mov_s r0,1 |
|
j_s.d [blink] |
|
bset.lo r0,r0,31 |
|
|
|
.balign 4 |
|
#ifdef __LITTLE_ENDIAN__ |
|
.Lfound0: |
|
xor r0,r2,r3 ; mask for difference |
|
or r0,r0,r4 ; or in zero indicator |
|
sub_s r1,r0,1 |
|
bic_s r0,r0,r1 ; mask for least significant difference bit |
|
sub r1,r5,r0 |
|
xor r0,r5,r1 ; mask for least significant difference byte |
|
and_s r2,r2,r0 |
|
and_s r3,r3,r0 |
|
sub.f r0,r2,r3 |
|
mov.hi r0,1 |
|
j_s.d [blink] |
|
bset.lo r0,r0,31 |
|
#else /* BIG ENDIAN */ |
|
/* The zero-detection above can mis-detect 0x01 bytes as zeroes |
|
because of carry-propagateion from a lower significant zero byte. |
|
We can compensate for this by checking that bit0 is zero. |
|
This compensation is not necessary in the step where we |
|
get a low estimate for r2, because in any affected bytes |
|
we already have 0x00 or 0x01, which will remain unchanged |
|
when bit 7 is cleared. */ |
|
.balign 4 |
|
.Lfound0: |
|
lsr r0,r4,8 |
|
lsr_s r1,r2 |
|
bic_s r2,r2,r0 ; get low estimate for r2 and get ... |
|
bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> |
|
or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... |
|
cmp_s r3,r2 ; ... be independent of trailing garbage |
|
or_s r2,r2,r0 ; likewise for r3 > r2 |
|
bic_s r3,r3,r0 |
|
rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 |
|
cmp_s r2,r3 |
|
j_s.d [blink] |
|
bset.lo r0,r0,31 |
|
#endif /* ENDIAN */ |
|
|
|
.balign 4 |
|
.Lcharloop: |
|
ldb.ab r2,[r0,1] |
|
ldb.ab r3,[r1,1] |
|
nop_s |
|
breq r2,0,.Lcmpend |
|
breq r2,r3,.Lcharloop |
|
.Lcmpend: |
|
j_s.d [blink] |
|
sub r0,r2,r3 |
|
END_CFI(strcmp)
|
|
|