summaryrefslogtreecommitdiff
path: root/sys/src/libc/power/memmove.s
blob: 34c1e3c5f756d6c0ea18edbc7a9e90f9dd6ddb1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#define	BDNZ	BC	16,0,
	TEXT	memmove(SB), $0
	BR	move

	TEXT	memcpy(SB), $0
move:

/*
 * performance:
 * (tba)
 */

	MOVW	R3, s1+0(FP)
	MOVW	n+8(FP), R9		/* R9 is count */
	MOVW	R3, R10			/* R10 is to-pointer */
	CMP	R9, $0
	BEQ	ret
	BLT	trap
	MOVW	s2+4(FP), R11		/* R11 is from-pointer */

/*
 * if no more than 16 bytes, just use one lsw/stsw
 */
	CMP	R9, $16
	BLE	fout

	ADD	R9,R11, R13		/* R13 is end from-pointer */
	ADD	R9,R10, R12		/* R12 is end to-pointer */

/*
 * easiest test is copy backwards if
 * destination string has higher mem address
 */
	CMPU	R10, R11
	BGT	back

/*
 * test if both pointers
 * are similarly word aligned
 */
	XOR	R10,R11, R7
	ANDCC	$3,R7
	BNE	fbad

/*
 * move a few bytes to align pointers
 */
	ANDCC	$3,R10,R7
	BEQ	f2
	SUBC	R7, $4, R7
	SUB	R7, R9
	MOVW	R7, XER
	LSW	(R11), R16
	ADD	R7, R11
	STSW	R16, (R10)
	ADD	R7, R10

/*
 * turn R14 into doubleword count
 * copy 16 bytes at a time while there's room.
 */
f2:
	SRAWCC	$4, R9, R14
	BLE	fout
	MOVW	R14, CTR
	SUB	$4, R11
	SUB	$4, R10
f3:
	MOVWU	4(R11), R16
	MOVWU	4(R11), R17
	MOVWU	4(R11), R18
	MOVWU	4(R11), R19
	MOVWU	R16, 4(R10)
	MOVWU	R17, 4(R10)
	MOVWU	R18, 4(R10)
	MOVWU	R19, 4(R10)
	BDNZ	f3
	RLWNMCC	$0, R9, $15, R9	/* residue */
	BEQ	ret
	ADD	$4, R11
	ADD	$4, R10

/*
 * move up to 16 bytes through R16 .. R19; aligned and unaligned
 */
fout:
	MOVW	R9, XER
	LSW	(R11), R16
	STSW	R16, (R10)
	BR	ret

/*
 * loop for unaligned copy, then copy up to 15 remaining bytes
 */
fbad:
	SRAWCC	$4, R9, R14
	BLE	f6
	MOVW	R14, CTR
f5:
	LSW	(R11), $16, R16
	ADD	$16, R11
	STSW	R16, $16, (R10)
	ADD	$16, R10
	BDNZ	f5
	RLWNMCC	$0, R9, $15, R9	/* residue */
	BEQ	ret
f6:
	MOVW	R9, XER
	LSW	(R11), R16
	STSW	R16, (R10)
	BR	ret

/*
 * whole thing repeated for backwards
 */
back:
	CMP	R9, $4
	BLT	bout

	XOR	R12,R13, R7
	ANDCC	$3,R7
	BNE	bout
b1:
	ANDCC	$3,R13, R7
	BEQ	b2
	MOVBZU	-1(R13), R16
	MOVBZU	R16, -1(R12)
	SUB	$1, R9
	BR	b1
b2:
	SRAWCC	$4, R9, R14
	BLE	b4
	MOVW	R14, CTR
b3:
	MOVWU	-4(R13), R16
	MOVWU	-4(R13), R17
	MOVWU	-4(R13), R18
	MOVWU	-4(R13), R19
	MOVWU	R16, -4(R12)
	MOVWU	R17, -4(R12)
	MOVWU	R18, -4(R12)
	MOVWU	R19, -4(R12)
	BDNZ	b3
	RLWNMCC	$0, R9, $15, R9	/* residue */
	BEQ	ret
b4:
	SRAWCC	$2, R9, R14
	BLE	bout
	MOVW	R14, CTR
b5:
	MOVWU	-4(R13), R16
	MOVWU	R16, -4(R12)
	BDNZ	b5
	RLWNMCC	$0, R9, $3, R9	/* residue */
	BEQ	ret

bout:
	CMPU	R13, R11
	BLE	ret
	MOVBZU	-1(R13), R16
	MOVBZU	R16, -1(R12)
	BR	bout

trap:
	MOVW	$0, R0
	MOVW	0(R0), R0

ret:
	MOVW	s1+0(FP), R3
	RETURN