forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
219 lines
4.3 KiB
219 lines
4.3 KiB
/* SPDX-License-Identifier: GPL-2.0-only */ |
|
/* |
|
* Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) |
|
*/ |
|
|
|
#include <linux/linkage.h> |
|
|
|
#ifdef __LITTLE_ENDIAN__ |
|
# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << |
|
# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> |
|
# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM |
|
# define MERGE_2(RX,RY,IMM) |
|
# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF |
|
# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM |
|
#else |
|
# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> |
|
# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << |
|
# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << |
|
# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << |
|
# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM |
|
# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 |
|
#endif |
|
|
|
#ifdef CONFIG_ARC_HAS_LL64 |
|
# define LOADX(DST,RX) ldd.ab DST, [RX, 8] |
|
# define STOREX(SRC,RX) std.ab SRC, [RX, 8] |
|
# define ZOLSHFT 5 |
|
# define ZOLAND 0x1F |
|
#else |
|
# define LOADX(DST,RX) ld.ab DST, [RX, 4] |
|
# define STOREX(SRC,RX) st.ab SRC, [RX, 4] |
|
# define ZOLSHFT 4 |
|
# define ZOLAND 0xF |
|
#endif |
|
|
|
ENTRY_CFI(memcpy) |
|
mov.f 0, r2 |
|
;;; if size is zero |
|
jz.d [blink] |
|
mov r3, r0 ; don;t clobber ret val |
|
|
|
;;; if size <= 8 |
|
cmp r2, 8 |
|
bls.d @.Lsmallchunk |
|
mov.f lp_count, r2 |
|
|
|
and.f r4, r0, 0x03 |
|
rsub lp_count, r4, 4 |
|
lpnz @.Laligndestination |
|
;; LOOP BEGIN |
|
ldb.ab r5, [r1,1] |
|
sub r2, r2, 1 |
|
stb.ab r5, [r3,1] |
|
.Laligndestination: |
|
|
|
;;; Check the alignment of the source |
|
and.f r4, r1, 0x03 |
|
bnz.d @.Lsourceunaligned |
|
|
|
;;; CASE 0: Both source and destination are 32bit aligned |
|
;;; Convert len to Dwords, unfold x4 |
|
lsr.f lp_count, r2, ZOLSHFT |
|
lpnz @.Lcopy32_64bytes |
|
;; LOOP START |
|
LOADX (r6, r1) |
|
LOADX (r8, r1) |
|
LOADX (r10, r1) |
|
LOADX (r4, r1) |
|
STOREX (r6, r3) |
|
STOREX (r8, r3) |
|
STOREX (r10, r3) |
|
STOREX (r4, r3) |
|
.Lcopy32_64bytes: |
|
|
|
and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes |
|
.Lsmallchunk: |
|
lpnz @.Lcopyremainingbytes |
|
;; LOOP START |
|
ldb.ab r5, [r1,1] |
|
stb.ab r5, [r3,1] |
|
.Lcopyremainingbytes: |
|
|
|
j [blink] |
|
;;; END CASE 0 |
|
|
|
.Lsourceunaligned: |
|
cmp r4, 2 |
|
beq.d @.LunalignedOffby2 |
|
sub r2, r2, 1 |
|
|
|
bhi.d @.LunalignedOffby3 |
|
ldb.ab r5, [r1, 1] |
|
|
|
;;; CASE 1: The source is unaligned, off by 1 |
|
;; Hence I need to read 1 byte for a 16bit alignment |
|
;; and 2bytes to reach 32bit alignment |
|
ldh.ab r6, [r1, 2] |
|
sub r2, r2, 2 |
|
;; Convert to words, unfold x2 |
|
lsr.f lp_count, r2, 3 |
|
MERGE_1 (r6, r6, 8) |
|
MERGE_2 (r5, r5, 24) |
|
or r5, r5, r6 |
|
|
|
;; Both src and dst are aligned |
|
lpnz @.Lcopy8bytes_1 |
|
;; LOOP START |
|
ld.ab r6, [r1, 4] |
|
ld.ab r8, [r1,4] |
|
|
|
SHIFT_1 (r7, r6, 24) |
|
or r7, r7, r5 |
|
SHIFT_2 (r5, r6, 8) |
|
|
|
SHIFT_1 (r9, r8, 24) |
|
or r9, r9, r5 |
|
SHIFT_2 (r5, r8, 8) |
|
|
|
st.ab r7, [r3, 4] |
|
st.ab r9, [r3, 4] |
|
.Lcopy8bytes_1: |
|
|
|
;; Write back the remaining 16bits |
|
EXTRACT_1 (r6, r5, 16) |
|
sth.ab r6, [r3, 2] |
|
;; Write back the remaining 8bits |
|
EXTRACT_2 (r5, r5, 16) |
|
stb.ab r5, [r3, 1] |
|
|
|
and.f lp_count, r2, 0x07 ;Last 8bytes |
|
lpnz @.Lcopybytewise_1 |
|
;; LOOP START |
|
ldb.ab r6, [r1,1] |
|
stb.ab r6, [r3,1] |
|
.Lcopybytewise_1: |
|
j [blink] |
|
|
|
.LunalignedOffby2: |
|
;;; CASE 2: The source is unaligned, off by 2 |
|
ldh.ab r5, [r1, 2] |
|
sub r2, r2, 1 |
|
|
|
;; Both src and dst are aligned |
|
;; Convert to words, unfold x2 |
|
lsr.f lp_count, r2, 3 |
|
#ifdef __BIG_ENDIAN__ |
|
asl.nz r5, r5, 16 |
|
#endif |
|
lpnz @.Lcopy8bytes_2 |
|
;; LOOP START |
|
ld.ab r6, [r1, 4] |
|
ld.ab r8, [r1,4] |
|
|
|
SHIFT_1 (r7, r6, 16) |
|
or r7, r7, r5 |
|
SHIFT_2 (r5, r6, 16) |
|
|
|
SHIFT_1 (r9, r8, 16) |
|
or r9, r9, r5 |
|
SHIFT_2 (r5, r8, 16) |
|
|
|
st.ab r7, [r3, 4] |
|
st.ab r9, [r3, 4] |
|
.Lcopy8bytes_2: |
|
|
|
#ifdef __BIG_ENDIAN__ |
|
lsr.nz r5, r5, 16 |
|
#endif |
|
sth.ab r5, [r3, 2] |
|
|
|
and.f lp_count, r2, 0x07 ;Last 8bytes |
|
lpnz @.Lcopybytewise_2 |
|
;; LOOP START |
|
ldb.ab r6, [r1,1] |
|
stb.ab r6, [r3,1] |
|
.Lcopybytewise_2: |
|
j [blink] |
|
|
|
.LunalignedOffby3: |
|
;;; CASE 3: The source is unaligned, off by 3 |
|
;;; Hence, I need to read 1byte for achieve the 32bit alignment |
|
|
|
;; Both src and dst are aligned |
|
;; Convert to words, unfold x2 |
|
lsr.f lp_count, r2, 3 |
|
#ifdef __BIG_ENDIAN__ |
|
asl.ne r5, r5, 24 |
|
#endif |
|
lpnz @.Lcopy8bytes_3 |
|
;; LOOP START |
|
ld.ab r6, [r1, 4] |
|
ld.ab r8, [r1,4] |
|
|
|
SHIFT_1 (r7, r6, 8) |
|
or r7, r7, r5 |
|
SHIFT_2 (r5, r6, 24) |
|
|
|
SHIFT_1 (r9, r8, 8) |
|
or r9, r9, r5 |
|
SHIFT_2 (r5, r8, 24) |
|
|
|
st.ab r7, [r3, 4] |
|
st.ab r9, [r3, 4] |
|
.Lcopy8bytes_3: |
|
|
|
#ifdef __BIG_ENDIAN__ |
|
lsr.nz r5, r5, 24 |
|
#endif |
|
stb.ab r5, [r3, 1] |
|
|
|
and.f lp_count, r2, 0x07 ;Last 8bytes |
|
lpnz @.Lcopybytewise_3 |
|
;; LOOP START |
|
ldb.ab r6, [r1,1] |
|
stb.ab r6, [r3,1] |
|
.Lcopybytewise_3: |
|
j [blink] |
|
|
|
END_CFI(memcpy)
|
|
|