summaryrefslogtreecommitdiff
path: root/sys/src/ape/lib/ap/sparc/memmove.s
blob: 8879a74e8a1eae7266ec5277771acc93acff7f2a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
	TEXT	memmove(SB), $0
	JMP	move

	TEXT	memcpy(SB), $0
move:

/*
 * performance:
 * (tba)
 */

	MOVW	R7, s1+0(FP)
	MOVW	n+8(FP), R9		/* R9 is count */
	MOVW	R7, R10			/* R10 is to-pointer */
	SUBCC	R0,R9, R0
	BGE	ok
	MOVW	0(R0), R0

ok:
	MOVW	s2+4(FP), R11		/* R11 is from-pointer */
	ADD	R9,R11, R13		/* R13 is end from-pointer */
	ADD	R9,R10, R12		/* R12 is end to-pointer */

/*
 * easiest test is copy backwards if
 * destination string has higher mem address
 */
	SUBCC	R11,R10, R0
	BGU	back

/*
 * if not at least 4 chars,
 * dont even mess around.
 * 3 chars to guarantee any
 * rounding up to a word
 * boundary and 4 characters
 * to get at least maybe one
 * full word store.
 */
	SUBCC	$4,R9, R0
	BL	fout

/*
 * test if both pointers
 * are similarly word aligned
 */
	XOR	R10,R11, R7
	ANDCC	$3,R7, R0
	BNE	fout

/*
 * byte at a time to word align
 */
f1:
	ANDCC	$3,R10, R0
	BE	f2
	MOVB	0(R11), R16
	ADD	$1, R11
	MOVB	R16, 0(R10)
	ADD	$1, R10
	JMP	f1

/*
 * turn R9 into to-end pointer-15
 * copy 16 at a time while theres room.
 * R12 is smaller than R13 --
 * there are problems if R13 is 0.
 */
f2:
	SUB	$15,R12, R9
f3:
	SUBCC	R10,R9, R0
	BLEU	f4
	MOVW	0(R11), R16
	MOVW	4(R11), R17
	MOVW	R16, 0(R10)
	MOVW	8(R11), R16
	MOVW	R17, 4(R10)
	MOVW	12(R11), R17
	ADD	$16, R11
	MOVW	R16, 8(R10)
	MOVW	R17, 12(R10)
	ADD	$16, R10
	JMP	f3

/*
 * turn R9 into to-end pointer-3
 * copy 4 at a time while theres room
 */
f4:
	SUB	$3,R12, R9
f5:
	SUBCC	R10,R9, R0
	BLEU	fout
	MOVW	0(R11), R16
	ADD	$4, R11
	MOVW	R16, 0(R10)
	ADD	$4, R10
	JMP	f5

/*
 * last loop, copy byte at a time
 */
fout:
	SUBCC	R11,R13, R0
	BLEU	ret
	MOVB	0(R11), R16
	ADD	$1, R11
	MOVB	R16, 0(R10)
	ADD	$1, R10
	JMP	fout

/*
 * whole thing repeated for backwards
 */
back:
	SUBCC	$4,R9, R0 
	BL	bout

	XOR	R12,R13, R7
	ANDCC	$3,R7, R0
	BNE	bout
b1:
	ANDCC	$3,R13, R0
	BE	b2
	MOVB	-1(R13), R16
	SUB	$1, R13
	MOVB	R16, -1(R12)
	SUB	$1, R12
	JMP	b1
b2:
	ADD	$15,R11, R9
b3:
	SUBCC	R9,R13, R0
	BLEU	b4
	MOVW	-4(R13), R16
	MOVW	-8(R13), R17
	MOVW	R16, -4(R12)
	MOVW	-12(R13), R16
	MOVW	R17, -8(R12)
	MOVW	-16(R13), R17
	SUB	$16, R13
	MOVW	R16, -12(R12)
	MOVW	R17, -16(R12)
	SUB	$16, R12
	JMP	b3
b4:
	ADD	$3,R11, R9
b5:
	SUBCC	R9,R13, R0
	BLEU	bout
	MOVW	-4(R13), R16
	SUB	$4, R13
	MOVW	R16, -4(R12)
	SUB	$4, R12
	JMP	b5

bout:
	SUBCC	R11,R13, R0
	BLEU	ret
	MOVB	-1(R13), R16
	SUB	$1, R13
	MOVB	R16, -1(R12)
	SUB	$1, R12
	JMP	bout

ret:
	MOVW	s1+0(FP), R7
	RETURN