1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
#define BDNZ BC 16,0,
/*
* 64/64 division adapted from powerpc compiler writer's handbook
*
* (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
* quo dvd dvs
*
* Remainder is left in R7:R8
*
* Code comment notation:
* msw = most-significant (high-order) word, i.e. bits 0..31
* lsw = least-significant (low-order) word, i.e. bits 32..63
* LZ = Leading Zeroes
* SD = Significant Digits
*
* R3:R4 = dvd (input dividend); quo (output quotient)
* R5:R6 = dvs (input divisor)
*
* R7:R8 = tmp; rem (output remainder)
*/
TEXT _divu64(SB), $0
MOVW a+0(FP), R3
MOVW a+4(FP), R4
MOVW b+8(FP), R5
MOVW b+12(FP), R6
/* count the number of leading 0s in the dividend */
CMP R3, $0 /* dvd.msw == 0? R3, */
CNTLZW R3, R11 /* R11 = dvd.msw.LZ */
CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */
BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */
lab1:
/* count the number of leading 0s in the divisor */
CMP R5, $0 /* dvd.msw == 0? */
CNTLZW R5, R9 /* R9 = dvs.msw.LZ */
CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */
BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */
lab2:
/* determine shift amounts to minimize the number of iterations */
CMP R11, R9 /* compare dvd.LZ to dvs.LZ */
SUBC R11, $64, R10 /* R10 = dvd.SD */
BGT lab9 /* if(dvs > dvd) quotient = 0 */
ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */
SUBC R9, $64, R9 /* R9 = dvs.SD */
ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
/* initial dvd */
SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */
/* initial tmp */
MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */
/* R7:R8 = R3:R4 >> R9 */
CMP R9, $32
ADD $-32, R9, R7
BLT lab3 /* if(R9 < 32) jump to lab3 */
SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */
MOVW $0, R7 /* tmp.msw = 0 */
BR lab4
lab3:
SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */
SUBC R9, $32, R7
SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */
OR R7, R8 /* tmp.lsw = R8 | R7 */
SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */
lab4:
/* R3:R4 = R3:R4 << R11 */
CMP R11,$32
ADDC $-32, R11, R9
BLT lab5 /* (R11 < 32)? */
SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */
MOVW $0, R4 /* dvd.lsw = 0 */
BR lab6
lab5:
SLW R11, R3 /* R3 = dvd.msw << R11 */
SUBC R11, $32, R9
SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */
OR R9, R3 /* dvd.msw = R3 | R9 */
SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */
lab6:
/* restoring division shift and subtract loop */
MOVW $-1, R10
ADDC $0, R7 /* clear carry bit before loop starts */
lab7:
/* tmp:dvd is considered one large register */
/* each portion is shifted left 1 bit by adding it to itself */
/* adde sums the carry from the previous and creates a new carry */
ADDE R4,R4 /* shift dvd.lsw left 1 bit */
ADDE R3,R3 /* shift dvd.msw to left 1 bit */
ADDE R8,R8 /* shift tmp.lsw to left 1 bit */
ADDE R7,R7 /* shift tmp.msw to left 1 bit */
SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */
SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */
BLT lab8 /* if(result < 0) clear carry bit */
MOVW R11, R8 /* move lsw */
MOVW R9, R7 /* move msw */
ADDC $1, R10, R11 /* set carry bit */
lab8:
BDNZ lab7
ADDE R4,R4 /* quo.lsw (lsb = CA) */
ADDE R3,R3 /* quo.msw (lsb from lsw) */
lab10:
MOVW qp+16(FP), R9
MOVW rp+20(FP), R10
CMP R9, $0
BEQ lab11
MOVW R3, 0(R9)
MOVW R4, 4(R9)
lab11:
CMP R10, $0
BEQ lab12
MOVW R7, 0(R10)
MOVW R8, 4(R10)
lab12:
RETURN
lab9:
/* Quotient is 0 (dvs > dvd) */
MOVW R4, R8 /* rmd.lsw = dvd.lsw */
MOVW R3, R7 /* rmd.msw = dvd.msw */
MOVW $0, R4 /* dvd.lsw = 0 */
MOVW $0, R3 /* dvd.msw = 0 */
BR lab10
|