summaryrefslogtreecommitdiff
path: root/sys/src/libc/alpha/memmove.s
blob: 8a77f68d7ece24523a337eade3fe4a662b12d9c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#define QUAD	8
#define ALIGN	64
#define BLOCK	64

TEXT memmove(SB), $0
_memmove:
	MOVL	from+4(FP), R7
	MOVL	n+8(FP), R10
	MOVQ	R0, R6

	CMPUGE	R7, R0, R5
	BNE	R5, _forward

	MOVQ	R6, R8			/* end to address */
	ADDL	R10, R6, R6		/* to+n */
	ADDL	R10, R7, R7		/* from+n */

	CMPUGE	$ALIGN, R10, R1		/* need at least ALIGN bytes */
	BNE	R1, _b1tail

_balign:
	AND	$(ALIGN-1), R6, R1
	BEQ	R1, _baligned

	MOVBU	-1(R7), R2
	ADDL	$-1, R6, R6
	MOVB	R2, (R6)
	ADDL	$-1, R7, R7
	JMP	_balign
	
_baligned:
	AND	$(QUAD-1), R7, R1	/* is the source quad-aligned */
	BNE	R1, _bunaligned

	ADDL	$(BLOCK-1), R8, R9
_bblock:
	CMPUGE	R9, R6, R1
	BNE	R1, _b8tail

	MOVQ	-64(R7), R22
	MOVQ	-56(R7), R23
	MOVQ	-48(R7), R24
	MOVQ	-40(R7), R25
	MOVQ	-32(R7), R2
	MOVQ	-24(R7), R3
	MOVQ	-16(R7), R4
	MOVQ	-8(R7), R5

	SUBL	$64, R6, R6
	SUBL	$64, R7, R7

	MOVQ	R22, (R6)
	MOVQ	R23, 8(R6)
	MOVQ	R24, 16(R6)
	MOVQ	R25, 24(R6)
	MOVQ	R2, 32(R6)
	MOVQ	R3, 40(R6)
	MOVQ	R4, 48(R6)
	MOVQ	R5, 56(R6)
	JMP	_bblock

_b8tail:
	ADDL	$(QUAD-1), R8, R9
_b8block:
	CMPUGE	R9, R6, R1
	BNE	R1, _b1tail

	MOVQ	-8(R7), R2
	SUBL	$8, R6
	MOVQ	R2, (R6)
	SUBL	$8, R7
	JMP	_b8block

_b1tail:
	CMPUGE	R8, R6, R1
	BNE	R1, _ret

	MOVBU	-1(R7), R2
	SUBL	$1, R6, R6
	MOVB	R2, (R6)
	SUBL	$1, R7, R7
	JMP	_b1tail
_ret:
	RET

_bunaligned:
	ADDL	$(16-1), R8, R9

_bu8block:
	CMPUGE	R9, R6, R1
	BNE	R1, _b1tail

	MOVQU	-16(R7), R4
	MOVQU	-8(R7), R3
	MOVQU	(R7), R2
	SUBL	$16, R6
	EXTQH	R7, R2, R2
	EXTQL	R7, R3, R5
	OR	R5, R2, R11
	EXTQH	R7, R3, R3
	EXTQL	R7, R4, R4
	OR	R3, R4, R13
	MOVQ	R11, 8(R6)
	MOVQ	R13, (R6)
	SUBL	$16, R7
	JMP	_bu8block

_forward:
	ADDL	R10, R6, R8		/* end to address */

	CMPUGE	$ALIGN, R10, R1		/* need at least ALIGN bytes */
	BNE	R1, _f1tail

_falign:
	AND	$(ALIGN-1), R6, R1
	BEQ	R1, _faligned

	MOVBU	(R7), R2
	ADDL	$1, R6, R6
	ADDL	$1, R7, R7
	MOVB	R2, -1(R6)
	JMP	_falign

_faligned:
	AND	$(QUAD-1), R7, R1	/* is the source quad-aligned */
	BNE	R1, _funaligned

	SUBL	$(BLOCK-1), R8, R9
_fblock:
	CMPUGT	R9, R6, R1
	BEQ	R1, _f8tail

	MOVQ	(R7), R2
	MOVQ	8(R7), R3
	MOVQ	16(R7), R4
	MOVQ	24(R7), R5
	MOVQ	32(R7), R22
	MOVQ	40(R7), R23
	MOVQ	48(R7), R24
	MOVQ	56(R7), R25

	ADDL	$64, R6, R6
	ADDL	$64, R7, R7

	MOVQ	R2, -64(R6)
	MOVQ	R3, -56(R6)
	MOVQ	R4, -48(R6)
	MOVQ	R5, -40(R6)
	MOVQ	R22, -32(R6)
	MOVQ	R23, -24(R6)
	MOVQ	R24, -16(R6)
	MOVQ	R25, -8(R6)
	JMP	_fblock

_f8tail:
	SUBL	$(QUAD-1), R8, R9
_f8block:
	CMPUGT	R9, R6, R1
	BEQ	R1, _f1tail

	MOVQ	(R7), R2
	ADDL	$8, R6
	ADDL	$8, R7
	MOVQ	R2, -8(R6)
	JMP	_f8block

_f1tail:
	CMPUGT	R8, R6, R1
	BEQ	R1, _fret
	MOVBU	(R7), R2
	ADDL	$1, R6, R6
	ADDL	$1, R7, R7
	MOVB	R2, -1(R6)
	JMP	_f1tail

_fret:
	RET

_funaligned:
	SUBL	$(16-1), R8, R9
_fu8block:
	CMPUGT	R9, R6, R1
	BEQ	R1, _f1tail

	MOVQU	(R7), R2
	MOVQU	8(R7), R3
	MOVQU	16(R7), R4
	EXTQL	R7, R2, R2
	EXTQH	R7, R3, R5
	OR	R5, R2, R11
	EXTQL	R7, R3, R3
	MOVQ	R11, (R6)
	EXTQH	R7, R4, R4
	OR	R3, R4, R11
	MOVQ	R11, 8(R6)
	ADDL	$16, R6
	ADDL	$16, R7
	JMP	_fu8block

TEXT	memcpy(SB), $0
	JMP	_memmove