diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/lib/ap/power/memcmp.s |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/lib/ap/power/memcmp.s')
-rwxr-xr-x | sys/src/ape/lib/ap/power/memcmp.s | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/sys/src/ape/lib/ap/power/memcmp.s b/sys/src/ape/lib/ap/power/memcmp.s new file mode 100755 index 000000000..f524fa9d3 --- /dev/null +++ b/sys/src/ape/lib/ap/power/memcmp.s @@ -0,0 +1,110 @@ + TEXT memcmp(SB), $0 +#define BDNZ BC 16,0, + MOVW R3, s1+0(FP) /* R3 is pointer1 */ + +/* + * performance: + * 67mb/sec aligned; 16mb/sec unaligned + */ + + MOVW n+8(FP), R4 /* R4 is count */ + MOVW s2+4(FP), R5 /* R5 is pointer2 */ + +/* + * let LSW do the work for 4 characters or less; aligned and unaligned + */ + CMP R4, $0 + BLE eq + CMP R4, $4 + BLE out + + XOR R3, R5, R9 + ANDCC $3, R9 + BNE l4 /* pointers misaligned; use LSW loop */ + +/* + * do enough bytes to align pointers + */ + ANDCC $3,R3, R9 + BEQ l2 + SUBC R9, $4, R9 + MOVW R9, XER + LSW (R3), R10 + ADD R9, R3 + LSW (R5), R14 + ADD R9, R5 + SUB R9, R4 + CMPU R10, R14 + BNE ne + +/* + * compare 16 at a time + */ +l2: + SRAWCC $4, R4, R9 + BLE l4 + MOVW R9, CTR + SUB $4, R3 + SUB $4, R5 +l3: + MOVWU 4(R3), R10 + MOVWU 4(R5), R12 + MOVWU 4(R3), R11 + MOVWU 4(R5), R13 + CMPU R10, R12 + BNE ne + MOVWU 4(R3), R10 + MOVWU 4(R5), R12 + CMPU R11, R13 + BNE ne + MOVWU 4(R3), R11 + MOVWU 4(R5), R13 + CMPU R10, R12 + BNE ne + CMPU R11, R13 + BNE ne + BDNZ l3 + ADD $4, R3 + ADD $4, R5 + RLWNMCC $0, R4, $15, R4 /* residue */ + BEQ eq + +/* + * do remaining words with LSW; also does unaligned case + */ +l4: + SRAWCC $2, R4, R9 + BLE out + MOVW R9, CTR +l5: + LSW (R3), $4, R10 + ADD $4, R3 + LSW (R5), $4, R11 + ADD $4, R5 + CMPU R10, R11 + BNE ne + BDNZ l5 + RLWNMCC $0, R4, $3, R4 /* residue */ + BEQ eq + +/* + * do remaining bytes with final LSW + */ +out: + MOVW R4, XER + LSW (R3), R10 + LSW (R5), R11 + CMPU R10, R11 + BNE ne + +eq: + MOVW $0, R3 + RETURN + +ne: + MOVW $1, R3 + BGE ret + MOVW $-1,R3 +ret: + RETURN + END |