summaryrefslogtreecommitdiff
path: root/sys/man
diff options
context:
space:
mode:
authorJacob Moody <moody@posixcafe.org>2023-03-26 01:02:20 +0000
committerJacob Moody <moody@posixcafe.org>2023-03-26 01:02:20 +0000
commit04759ec9af6dcc78ea5873ceaf6db2e3b3920b22 (patch)
tree0e20684eee0f36fff952c46bc14828f1005dfb31 /sys/man
parent2163aebcb85e8214869a2c026b3fc5bd9ddac22c (diff)
runecomp(2)
Diffstat (limited to 'sys/man')
-rw-r--r--sys/man/2/isalpharune6
-rw-r--r--sys/man/2/runecomp116
2 files changed, 121 insertions, 1 deletions
diff --git a/sys/man/2/isalpharune b/sys/man/2/isalpharune
index 1c28369a2..05e8b256c 100644
--- a/sys/man/2/isalpharune
+++ b/sys/man/2/isalpharune
@@ -48,7 +48,11 @@ The names are self-explanatory.
.PP
The case-conversion routines return the character unchanged if it has no case.
.SH SOURCE
-.B /sys/src/libc/port/runetype.c
+.B /sys/src/libc/port/mkrunetype.c
+.br
+.B /sys/src/libc/port/runeistype.c
+.br
+.B /sys/src/libc/port/runetotype.c
.SH "SEE ALSO
.IR ctype (2) ,
.IR "The Unicode Standard" .
diff --git a/sys/man/2/runecomp b/sys/man/2/runecomp
new file mode 100644
index 000000000..e24c59415
--- /dev/null
+++ b/sys/man/2/runecomp
@@ -0,0 +1,116 @@
+.TH RUNECOMP 2
+.SH NAME
+runecomp, runedecomp, fullrunenorm, runegbreak, runewbreak, utfcomp, utfdecomp, fullutfnorm, utfgbreak, utfwbreak \- multi-rune graphemes
+.SH SYNOPSIS
+.ta \w'\fLchar*xx'u
+.B #include <u.h>
+.br
+.B #include <libc.h>
+.PP
+.B
+int runecomp(Rune *dst, Rune *src, int max)
+.PP
+.B
+int runedecomp(Rune *dst, Rune *src, int max)
+.PP
+.B
+Rune* fullrunenorm(Rune *s, int n)
+.PP
+.B
+Rune* runegbreak(Rune *s)
+.PP
+.B
+Rune* runewbreak(Rune *s)
+.PP
+.B
+int utfcomp(char *dst, char *src, int max)
+.PP
+.B
+int utfdecomp(char *dst, char *src, int max)
+.PP
+.B
+char* fullutfnorm(char *s, int n)
+.PP
+.B
+char* utfgbreak(char *s)
+.PP
+.B
+char* utfwbreak(char *s)
+.SH DESCRIPTION
+These routines help in handling
+graphemes that may span multiple runes.
+.PP
+.IR Runecomp ,
+.IR runedecomp ,
+.IR utfcomp ,
+and
+.I utfdecomp
+perform UnicodeĀ® normalization on
+.IR src ,
+storing the result in
+.IR dst .
+No more than
+.I max
+elements will be written, and the resulting string
+will always be null terminated. The return value
+is always the total number of elements required to
+store the transformation. If this value is larger
+than the supplied
+.I max
+the caller can assume the result has been truncated.
+.I Runecomp
+and
+.I utfcomp
+perform NFC normalization while
+.I runedecomp
+and
+.I utfdecomp
+perform NFD normalization.
+.PP
+.IR Fullrunenorm ,
+and
+.I fullutfnorm
+determine if enough elements are present in
+.I s
+to perform normalization. If enough are present,
+a pointer is returned to the first element that begins
+the next context. Otherwise
+.I s
+is returned. No more then
+.I n
+elements will be read. In order to find the boundary, the
+first element of the next context must be peeked.
+.PP
+.I Runegbreak
+and
+.I utfgbreak
+search
+.B s
+for the next grapheme break opportunity.
+If none is found before the end of the string,
+.I s
+is returned.
+.PP
+.I Runewbreak
+and
+.I utfwbreak
+search
+.B s
+for the next word break opportunity.
+If none is found before the end of the string,
+.I s
+is returned.
+.SH SOURCE
+.B /sys/src/libc/port/mkrunetype.c
+.br
+.B /sys/src/libc/port/runenorm.c
+.br
+.B /sys/src/libc/port/runebreak.c
+.SH SEE ALSO
+UnicodeĀ® Standard Annex #15
+.br
+UnicodeĀ® Standard Annex #29
+.br
+.IR rune (2),
+.IR utf (6),
+.IR tcs (1)