summaryrefslogtreecommitdiff
path: root/sys/src/libc/power/memset.s
blob: fa6e8d920828d43b5c26789f94c6d0ef52b2fbc9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
	TEXT	memset(SB),$0
#define	BDNZ	BC	16,0,
	MOVW R3, p+0(FP)		/* R3 is pointer */

/*
 * performance:
 *	about 100mbytes/sec (8k blocks) on a 603/105 without L2 cache
 *	drops to 40mbytes/sec (10k blocks) and 28mbytes/sec with 32k blocks
 */

	MOVW	n+8(FP), R4		/* R4 is count */
	CMP	R4, $0
	BLE	ret
	MOVW	c+4(FP), R5		/* R5 is char */

/*
 * create 16 copies of c in R5 .. R8
 */
	RLWNM	$0, R5, $0xff, R5
	RLWMI	$8, R5, $0xff00, R5
	RLWMI	$16, R5, $0xffff0000, R5
	MOVW	R5, R6
	MOVW	R5, R7
	MOVW	R5, R8

/*
 * let STSW do the work for 16 characters or less; aligned and unaligned
 */
	CMP	R4, $16
	BLE	out

/*
 * store enough bytes to align pointer
 */
	ANDCC	$7,R3, R9
	BEQ	l2
	SUBC	R9, $8, R9
	MOVW	R9, XER
	STSW	R5, (R3)
	ADD	R9, R3
	SUB	R9, R4

/*
 * store 16 at a time while there's room
 * STSW was used here originally, but it's `completion serialised'
 */
l2:
	SRAWCC	$4, R4, R9
	BLE	out
	MOVW	R9, CTR
l3:
	MOVW	R5, 0(R3)
	ADD	$8, R3, R10
	MOVW	R6, 4(R3)
	MOVW	R7, 0(R10)
	ADD	$8, R10, R3
	MOVW	R8, 4(R10)
	BDNZ	l3
	RLWNMCC	$0, R4, $15, R4	/* residue */
	BEQ	ret

/*
 * store up to 16 bytes from R5 .. R8; aligned and unaligned
 */

out:
	MOVW	R4, XER
	STSW	R5, (R3)

ret:
	MOVW	0(FP), R3
	RETURN
	END