diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2019-02-27 18:29:08 +0100 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2019-02-27 18:29:08 +0100 |
commit | 967b1248f82e5f64eab4dbf45898a37851b71fa2 (patch) | |
tree | cec343c29c88f4f437319abf586dbb17cef019ee /sys/src/libip | |
parent | 88ccea37f65b87312292b2b1a107c39927c6a2c3 (diff) |
libip: move optimized 386 assembly version of ptclbsum() from kernel to libip
Diffstat (limited to 'sys/src/libip')
-rw-r--r-- | sys/src/libip/mkfile | 3 | ||||
-rw-r--r-- | sys/src/libip/ptclbsum386.s | 126 |
2 files changed, 129 insertions, 0 deletions
diff --git a/sys/src/libip/mkfile b/sys/src/libip/mkfile index 5a63a01a7..47c25c4ea 100644 --- a/sys/src/libip/mkfile +++ b/sys/src/libip/mkfile @@ -24,3 +24,6 @@ UPDATE=\ ${LIB:/$objtype/%=/386/%}\ </sys/src/cmd/mksyslib + +ptclbsum.8: ptclbsum386.s + $AS -o $target ptclbsum386.s diff --git a/sys/src/libip/ptclbsum386.s b/sys/src/libip/ptclbsum386.s new file mode 100644 index 000000000..ba0a6a4d1 --- /dev/null +++ b/sys/src/libip/ptclbsum386.s @@ -0,0 +1,126 @@ +TEXT ptclbsum(SB), $0 + MOVL addr+0(FP), SI + MOVL len+4(FP), CX + + XORL AX, AX /* sum */ + + TESTL $1, SI /* byte aligned? */ + MOVL SI, DI + JEQ _2align + + DECL CX + JLT _return + + MOVB 0x00(SI), AH + INCL SI + +_2align: + TESTL $2, SI /* word aligned? */ + JEQ _32loop + + CMPL CX, $2 /* less than 2 bytes? */ + JLT _1dreg + SUBL $2, CX + + XORL BX, BX + MOVW 0x00(SI), BX + ADDL BX, AX + ADCL $0, AX + LEAL 2(SI), SI + +_32loop: + CMPL CX, $0x20 + JLT _8loop + + MOVL CX, BP + SHRL $5, BP + ANDL $0x1F, CX + +_32loopx: + MOVL 0x00(SI), BX + MOVL 0x1C(SI), DX + ADCL BX, AX + MOVL 0x04(SI), BX + ADCL DX, AX + MOVL 0x10(SI), DX + ADCL BX, AX + MOVL 0x08(SI), BX + ADCL DX, AX + MOVL 0x14(SI), DX + ADCL BX, AX + MOVL 0x0C(SI), BX + ADCL DX, AX + MOVL 0x18(SI), DX + ADCL BX, AX + LEAL 0x20(SI), SI + ADCL DX, AX + + DECL BP + JNE _32loopx + + ADCL $0, AX + +_8loop: + CMPL CX, $0x08 + JLT _2loop + + MOVL CX, BP + SHRL $3, BP + ANDL $0x07, CX + +_8loopx: + MOVL 0x00(SI), BX + ADCL BX, AX + MOVL 0x04(SI), DX + ADCL DX, AX + + LEAL 0x08(SI), SI + DECL BP + JNE _8loopx + + ADCL $0, AX + +_2loop: + CMPL CX, $0x02 + JLT _1dreg + + MOVL CX, BP + SHRL $1, BP + ANDL $0x01, CX + +_2loopx: + MOVWLZX 0x00(SI), BX + ADCL BX, AX + + LEAL 0x02(SI), SI + DECL BP + JNE _2loopx + + ADCL $0, AX + +_1dreg: + TESTL $1, CX /* 1 byte left? */ + JEQ _fold + + XORL BX, BX + MOVB 0x00(SI), BX + ADDL BX, AX + ADCL $0, AX + +_fold: + MOVL AX, BX + SHRL $16, BX + JEQ _swab + + ANDL $0xFFFF, AX + ADDL BX, AX + JMP _fold + +_swab: + TESTL $1, addr+0(FP) + /*TESTL $1, DI*/ + JNE _return + XCHGB AH, AL + +_return: + RET |