mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
143 lines
2.7 KiB
143 lines
2.7 KiB
/* SPDX-License-Identifier: GPL-2.0-only */ |
|
/* |
|
* Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) |
|
*/ |
|
|
|
#include <linux/linkage.h> |
|
#include <asm/cache.h> |
|
|
|
/* |
|
* The memset implementation below is optimized to use prefetchw and prealloc |
|
* instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) |
|
* If you want to implement optimized memset for other possible L1 data cache |
|
* line lengths (32B and 128B) you should rewrite code carefully checking |
|
* we don't call any prefetchw/prealloc instruction for L1 cache lines which |
|
* don't belongs to memset area. |
|
*/ |
|
|
|
#if L1_CACHE_SHIFT == 6 |
|
|
|
.macro PREALLOC_INSTR reg, off |
|
prealloc [\reg, \off] |
|
.endm |
|
|
|
.macro PREFETCHW_INSTR reg, off |
|
prefetchw [\reg, \off] |
|
.endm |
|
|
|
#else |
|
|
|
.macro PREALLOC_INSTR reg, off |
|
.endm |
|
|
|
.macro PREFETCHW_INSTR reg, off |
|
.endm |
|
|
|
#endif |
|
|
|
ENTRY_CFI(memset) |
|
PREFETCHW_INSTR r0, 0 ; Prefetch the first write location |
|
mov.f 0, r2 |
|
;;; if size is zero |
|
jz.d [blink] |
|
mov r3, r0 ; don't clobber ret val |
|
|
|
;;; if length < 8 |
|
brls.d.nt r2, 8, .Lsmallchunk |
|
mov.f lp_count,r2 |
|
|
|
and.f r4, r0, 0x03 |
|
rsub lp_count, r4, 4 |
|
lpnz @.Laligndestination |
|
;; LOOP BEGIN |
|
stb.ab r1, [r3,1] |
|
sub r2, r2, 1 |
|
.Laligndestination: |
|
|
|
;;; Destination is aligned |
|
and r1, r1, 0xFF |
|
asl r4, r1, 8 |
|
or r4, r4, r1 |
|
asl r5, r4, 16 |
|
or r5, r5, r4 |
|
mov r4, r5 |
|
|
|
sub3 lp_count, r2, 8 |
|
cmp r2, 64 |
|
bmsk.hi r2, r2, 5 |
|
mov.ls lp_count, 0 |
|
add3.hi r2, r2, 8 |
|
|
|
;;; Convert len to Dwords, unfold x8 |
|
lsr.f lp_count, lp_count, 6 |
|
|
|
lpnz @.Lset64bytes |
|
;; LOOP START |
|
PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching |
|
|
|
#ifdef CONFIG_ARC_HAS_LL64 |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
#else |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
#endif |
|
.Lset64bytes: |
|
|
|
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes |
|
lpnz .Lset32bytes |
|
;; LOOP START |
|
#ifdef CONFIG_ARC_HAS_LL64 |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
std.ab r4, [r3, 8] |
|
#else |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
st.ab r4, [r3, 4] |
|
#endif |
|
.Lset32bytes: |
|
|
|
and.f lp_count, r2, 0x1F ;Last remaining 31 bytes |
|
.Lsmallchunk: |
|
lpnz .Lcopy3bytes |
|
;; LOOP START |
|
stb.ab r1, [r3, 1] |
|
.Lcopy3bytes: |
|
|
|
j [blink] |
|
|
|
END_CFI(memset) |
|
|
|
ENTRY_CFI(memzero) |
|
; adjust bzero args to memset args |
|
mov r2, r1 |
|
b.d memset ;tail call so need to tinker with blink |
|
mov r1, 0 |
|
END_CFI(memzero)
|
|
|