1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
/*
* cortex arm arch v7 cache flushing and invalidation
* shared by l.s and rebootcode.s
*/
#define BPIALL MCR CpSC, 0, R0, C(CpCACHE), C(5), 6 /* branch predictor invalidate all */
TEXT cacheiinv(SB), $-4 /* I invalidate */
DSB
MOVW $0, R0
MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall /* ok on cortex */
BPIALL /* redundant? */
DSB
ISB
RET
TEXT cacheiinvse(SB), $0 /* I invalidate SE */
MOVW 4(FP), R1
ADD R0, R1
BIC $(ICACHELINESZ - 1), R0
DSB
_iinvse:
MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEse
ADD $ICACHELINESZ, R0
CMP.S R0, R1
BGT _iinvse
BPIALL
DSB
ISB
RET
/*
* set/way operators, passed a suitable set/way value in R0.
*/
TEXT cachedwb_sw(SB), $-4
MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEsi
RET
TEXT cachedwbinv_sw(SB), $-4
MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEsi
RET
TEXT cachedinv_sw(SB), $-4
MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEsi
RET
/* set cache size select */
TEXT setcachelvl(SB), $-4
MCR CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0
ISB
RET
/* return cache sizes */
TEXT getwayssets(SB), $-4
MRC CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0
RET
/*
* l1 cache operations.
* l1 and l2 ops are intended to be called from C, thus need save no
* caller's regs, only those we need to preserve across calls.
*/
TEXT cachedwb(SB), $-4
MOVW.W R14, -8(R13)
MOVW $cachedwb_sw(SB), R0
MOVW $1, R8
BL wholecache(SB)
MOVW.P 8(R13), R15
TEXT cachedwbinv(SB), $-4
MOVW.W R14, -8(R13)
MOVW $cachedwbinv_sw(SB), R0
MOVW $1, R8
BL wholecache(SB)
MOVW.P 8(R13), R15
TEXT cachedinv(SB), $-4
MOVW.W R14, -8(R13)
MOVW $cachedinv_sw(SB), R0
MOVW $1, R8
BL wholecache(SB)
MOVW.P 8(R13), R15
TEXT cacheuwbinv(SB), $-4
MOVM.DB.W [R14], (R13) /* save lr on stack */
MOVW CPSR, R1
CPSID /* splhi */
MOVM.DB.W [R1], (R13) /* save R1 on stack */
BL cachedwbinv(SB)
BL cacheiinv(SB)
MOVM.IA.W (R13), [R1] /* restore R1 (saved CPSR) */
MOVW R1, CPSR
MOVM.IA.W (R13), [R14] /* restore lr */
RET
/*
* l2 cache operations
*/
TEXT l2cacheuwb(SB), $-4
MOVW.W R14, -8(R13)
MOVW $cachedwb_sw(SB), R0
MOVW $2, R8
BL wholecache(SB)
MOVW.P 8(R13), R15
TEXT l2cacheuwbinv(SB), $-4
MOVW.W R14, -8(R13)
MOVW CPSR, R1
CPSID /* splhi */
MOVM.DB.W [R1], (R13) /* save R1 on stack */
MOVW $cachedwbinv_sw(SB), R0
MOVW $2, R8
BL wholecache(SB)
BL l2cacheuinv(SB)
MOVM.IA.W (R13), [R1] /* restore R1 (saved CPSR) */
MOVW R1, CPSR
MOVW.P 8(R13), R15
TEXT l2cacheuinv(SB), $-4
MOVW.W R14, -8(R13)
MOVW $cachedinv_sw(SB), R0
MOVW $2, R8
BL wholecache(SB)
MOVW.P 8(R13), R15
/*
* callers are assumed to be the above l1 and l2 ops.
* R0 is the function to call in the innermost loop.
* R8 is the cache level (one-origin: 1 or 2).
*
* initial translation by 5c, then massaged by hand.
*/
TEXT wholecache+0(SB), $-4
MOVW R0, R1 /* save argument for inner loop in R1 */
SUB $1, R8 /* convert cache level to zero origin */
/* we may not have the MMU on yet, so map R1 to PC's space */
BIC $KSEGM, R1 /* strip segment from address */
MOVW PC, R2 /* get PC's segment ... */
AND $KSEGM, R2
ORR R2, R1 /* combine them */
/* drain write buffers */
BARRIERS
MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
ISB
MOVW CPSR, R2
MOVM.DB.W [R2,R14], (SP) /* save regs on stack */
CPSID /* splhi to make entire op atomic */
/* get cache sizes */
SLL $1, R8, R0 /* R0 = (cache - 1) << 1 */
MCR CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0 /* set cache size select */
ISB
MRC CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0 /* get cache sizes */
/* compute # of ways and sets for this cache level */
SRA $3, R0, R5 /* R5 (ways) = R0 >> 3 */
AND $1023, R5 /* R5 = (R0 >> 3) & MASK(10) */
ADD $1, R5 /* R5 (ways) = ((R0 >> 3) & MASK(10)) + 1 */
SRA $13, R0, R2 /* R2 = R0 >> 13 */
AND $32767, R2 /* R2 = (R0 >> 13) & MASK(15) */
ADD $1, R2 /* R2 (sets) = ((R0 >> 13) & MASK(15)) + 1 */
/* precompute set/way shifts for inner loop */
MOVW $6, R4
CMP $0, R8 /* cache == 1? */
MOVW.EQ $30, R3 /* l1 */
MOVW.NE $29, R3 /* l2 */
CMP $16, R5 /* armv8 has 16-way l2, adjust shift */
MOVW.EQ $28, R3
/* iterate over ways */
MOVW $0, R7 /* R7: way */
outer:
/* iterate over sets */
MOVW $0, R6 /* R6: set */
inner:
/* compute set/way register contents */
SLL R3, R7, R0 /* R0 = way << R3 (L?WAYSH) */
ORR R8<<1, R0 /* R0 = way << L?WAYSH | (cache - 1) << 1 */
ORR R6<<R4, R0 /* R0 = way<<L?WAYSH | (cache-1)<<1 |set<<R4 */
BL (R1) /* call set/way operation with R0 */
ADD $1, R6 /* set++ */
CMP R2, R6 /* set >= sets? */
BLT inner /* no, do next set */
ADD $1, R7 /* way++ */
CMP R5, R7 /* way >= ways? */
BLT outer /* no, do next way */
MOVM.IA.W (SP), [R2,R14] /* restore regs */
MOVW R2, CPSR /* splx */
/* drain write buffers */
MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait
ISB
RET
|