summaryrefslogtreecommitdiff
path: root/sys/src/9/bcm/cache.v7.s
blob: 3b5869ed7399ea15f919a8aaf732774b2f950be1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/*
 * cortex arm arch v7 cache flushing and invalidation
 * shared by l.s and rebootcode.s
 */

#define	BPIALL	MCR CpSC, 0, R0, C(CpCACHE), C(5), 6	/* branch predictor invalidate all */

TEXT cacheiinv(SB), $-4				/* I invalidate */
	DSB
	MOVW	$0, R0
	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall /* ok on cortex */
	BPIALL	/* redundant? */
	DSB
	ISB
	RET

TEXT cacheiinvse(SB), $0			/* I invalidate SE */
	MOVW 4(FP), R1
	ADD	R0, R1
	BIC $(ICACHELINESZ - 1), R0
	DSB
_iinvse:
	MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEse
	ADD $ICACHELINESZ, R0
	CMP.S R0, R1
	BGT _iinvse
	BPIALL
	DSB
	ISB
	RET

/*
 * set/way operators, passed a suitable set/way value in R0.
 */
TEXT cachedwb_sw(SB), $-4
	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEsi
	RET

TEXT cachedwbinv_sw(SB), $-4
	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEsi
	RET

TEXT cachedinv_sw(SB), $-4
	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEsi
	RET

	/* set cache size select */
TEXT setcachelvl(SB), $-4
	MCR	CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0
	ISB
	RET

	/* return cache sizes */
TEXT getwayssets(SB), $-4
	MRC	CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0
	RET

/*
 * l1 cache operations.
 * l1 and l2 ops are intended to be called from C, thus need save no
 * caller's regs, only those we need to preserve across calls.
 */

TEXT cachedwb(SB), $-4
	MOVW.W	R14, -8(R13)
	MOVW	$cachedwb_sw(SB), R0
	MOVW	$1, R8
	BL	wholecache(SB)
	MOVW.P	8(R13), R15

TEXT cachedwbinv(SB), $-4
	MOVW.W	R14, -8(R13)
	MOVW	$cachedwbinv_sw(SB), R0
	MOVW	$1, R8
	BL	wholecache(SB)
	MOVW.P	8(R13), R15

TEXT cachedinv(SB), $-4
	MOVW.W	R14, -8(R13)
	MOVW	$cachedinv_sw(SB), R0
	MOVW	$1, R8
	BL	wholecache(SB)
	MOVW.P	8(R13), R15

TEXT cacheuwbinv(SB), $-4
	MOVM.DB.W [R14], (R13)	/* save lr on stack */
	MOVW	CPSR, R1
	CPSID			/* splhi */

	MOVM.DB.W [R1], (R13)	/* save R1 on stack */

	BL	cachedwbinv(SB)
	BL	cacheiinv(SB)

	MOVM.IA.W (R13), [R1]	/* restore R1 (saved CPSR) */
	MOVW	R1, CPSR
	MOVM.IA.W (R13), [R14]	/* restore lr */
	RET

/*
 * l2 cache operations
 */

TEXT l2cacheuwb(SB), $-4
	MOVW.W	R14, -8(R13)
	MOVW	$cachedwb_sw(SB), R0
	MOVW	$2, R8
	BL	wholecache(SB)
	MOVW.P	8(R13), R15

TEXT l2cacheuwbinv(SB), $-4
	MOVW.W	R14, -8(R13)
	MOVW	CPSR, R1
	CPSID			/* splhi */

	MOVM.DB.W [R1], (R13)	/* save R1 on stack */

	MOVW	$cachedwbinv_sw(SB), R0
	MOVW	$2, R8
	BL	wholecache(SB)
	BL	l2cacheuinv(SB)

	MOVM.IA.W (R13), [R1]	/* restore R1 (saved CPSR) */
	MOVW	R1, CPSR
	MOVW.P	8(R13), R15

TEXT l2cacheuinv(SB), $-4
	MOVW.W	R14, -8(R13)
	MOVW	$cachedinv_sw(SB), R0
	MOVW	$2, R8
	BL	wholecache(SB)
	MOVW.P	8(R13), R15

/*
 * callers are assumed to be the above l1 and l2 ops.
 * R0 is the function to call in the innermost loop.
 * R8 is the cache level (one-origin: 1 or 2).
 *
 * initial translation by 5c, then massaged by hand.
 */
TEXT wholecache+0(SB), $-4
	MOVW	R0, R1		/* save argument for inner loop in R1 */
	SUB	$1, R8		/* convert cache level to zero origin */

	/* we may not have the MMU on yet, so map R1 to PC's space */
	BIC	$KSEGM,	R1	/* strip segment from address */
	MOVW	PC, R2		/* get PC's segment ... */
	AND	$KSEGM, R2
	ORR	R2, R1		/* combine them */

	/* drain write buffers */
	BARRIERS
	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
	ISB

	MOVW	CPSR, R2
	MOVM.DB.W [R2,R14], (SP) /* save regs on stack */
	CPSID			/* splhi to make entire op atomic */

	/* get cache sizes */
	SLL	$1, R8, R0	/* R0 = (cache - 1) << 1 */
	MCR	CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0 /* set cache size select */
	ISB
	MRC	CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0 /* get cache sizes */

	/* compute # of ways and sets for this cache level */
	SRA	$3, R0, R5	/* R5 (ways) = R0 >> 3 */
	AND	$1023, R5	/* R5 = (R0 >> 3) & MASK(10) */
	ADD	$1, R5		/* R5 (ways) = ((R0 >> 3) & MASK(10)) + 1 */

	SRA	$13, R0, R2	/* R2 = R0 >> 13 */
	AND	$32767, R2	/* R2 = (R0 >> 13) & MASK(15) */
	ADD	$1, R2		/* R2 (sets) = ((R0 >> 13) & MASK(15)) + 1 */

	/* precompute set/way shifts for inner loop */
	MOVW	$6, R4
	CMP	$0, R8		/* cache == 1? */
	MOVW.EQ	$30, R3 	/* l1 */
	MOVW.NE	$29, R3		/* l2 */
	CMP	$16, R5		/* armv8 has 16-way l2, adjust shift */
	MOVW.EQ	$28, R3

	/* iterate over ways */
	MOVW	$0, R7		/* R7: way */
outer:
	/* iterate over sets */
	MOVW	$0, R6		/* R6: set */
inner:
	/* compute set/way register contents */
	SLL	R3, R7, R0 	/* R0 = way << R3 (L?WAYSH) */
	ORR	R8<<1, R0	/* R0 = way << L?WAYSH | (cache - 1) << 1 */
	ORR	R6<<R4, R0 	/* R0 = way<<L?WAYSH | (cache-1)<<1 |set<<R4 */

	BL	(R1)		/* call set/way operation with R0 */

	ADD	$1, R6		/* set++ */
	CMP	R2, R6		/* set >= sets? */
	BLT	inner		/* no, do next set */

	ADD	$1, R7		/* way++ */
	CMP	R5, R7		/* way >= ways? */
	BLT	outer		/* no, do next way */

	MOVM.IA.W (SP), [R2,R14] /* restore regs */
	MOVW	R2, CPSR	/* splx */

	/* drain write buffers */
	MCR	CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
	ISB
	RET