summaryrefslogtreecommitdiff
path: root/sys/src/ape/lib/ap/power/memcmp.s
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/lib/ap/power/memcmp.s
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/lib/ap/power/memcmp.s')
-rwxr-xr-xsys/src/ape/lib/ap/power/memcmp.s110
1 files changed, 110 insertions, 0 deletions
diff --git a/sys/src/ape/lib/ap/power/memcmp.s b/sys/src/ape/lib/ap/power/memcmp.s
new file mode 100755
index 000000000..f524fa9d3
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/memcmp.s
@@ -0,0 +1,110 @@
+ TEXT memcmp(SB), $0
+#define BDNZ BC 16,0,
+ MOVW R3, s1+0(FP) /* R3 is pointer1 */
+
+/*
+ * performance:
+ * 67mb/sec aligned; 16mb/sec unaligned
+ */
+
+ MOVW n+8(FP), R4 /* R4 is count */
+ MOVW s2+4(FP), R5 /* R5 is pointer2 */
+
+/*
+ * let LSW do the work for 4 characters or less; aligned and unaligned
+ */
+ CMP R4, $0
+ BLE eq
+ CMP R4, $4
+ BLE out
+
+ XOR R3, R5, R9
+ ANDCC $3, R9
+ BNE l4 /* pointers misaligned; use LSW loop */
+
+/*
+ * do enough bytes to align pointers
+ */
+ ANDCC $3,R3, R9
+ BEQ l2
+ SUBC R9, $4, R9
+ MOVW R9, XER
+ LSW (R3), R10
+ ADD R9, R3
+ LSW (R5), R14
+ ADD R9, R5
+ SUB R9, R4
+ CMPU R10, R14
+ BNE ne
+
+/*
+ * compare 16 at a time
+ */
+l2:
+ SRAWCC $4, R4, R9
+ BLE l4
+ MOVW R9, CTR
+ SUB $4, R3
+ SUB $4, R5
+l3:
+ MOVWU 4(R3), R10
+ MOVWU 4(R5), R12
+ MOVWU 4(R3), R11
+ MOVWU 4(R5), R13
+ CMPU R10, R12
+ BNE ne
+ MOVWU 4(R3), R10
+ MOVWU 4(R5), R12
+ CMPU R11, R13
+ BNE ne
+ MOVWU 4(R3), R11
+ MOVWU 4(R5), R13
+ CMPU R10, R12
+ BNE ne
+ CMPU R11, R13
+ BNE ne
+ BDNZ l3
+ ADD $4, R3
+ ADD $4, R5
+ RLWNMCC $0, R4, $15, R4 /* residue */
+ BEQ eq
+
+/*
+ * do remaining words with LSW; also does unaligned case
+ */
+l4:
+ SRAWCC $2, R4, R9
+ BLE out
+ MOVW R9, CTR
+l5:
+ LSW (R3), $4, R10
+ ADD $4, R3
+ LSW (R5), $4, R11
+ ADD $4, R5
+ CMPU R10, R11
+ BNE ne
+ BDNZ l5
+ RLWNMCC $0, R4, $3, R4 /* residue */
+ BEQ eq
+
+/*
+ * do remaining bytes with final LSW
+ */
+out:
+ MOVW R4, XER
+ LSW (R3), R10
+ LSW (R5), R11
+ CMPU R10, R11
+ BNE ne
+
+eq:
+ MOVW $0, R3
+ RETURN
+
+ne:
+ MOVW $1, R3
+ BGE ret
+ MOVW $-1,R3
+ret:
+ RETURN
+ END