diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 16:53:33 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 16:53:33 +0300 |
commit | e463eb40363ff4c68b1d903f4e0cdd0ac1c5977f (patch) | |
tree | d5e9f57c28f026cb21de3bd77cc10cd7f64aaa85 /sys/lib/antiword | |
parent | b41b9034225ab3e49980d9de55c141011b6383b0 (diff) |
Import sources from 2011-03-30 iso image - sys/lib
Diffstat (limited to 'sys/lib/antiword')
33 files changed, 8244 insertions, 0 deletions
diff --git a/sys/lib/antiword/8859-1.txt b/sys/lib/antiword/8859-1.txt new file mode 100755 index 000000000..473ecabc1 --- /dev/null +++ b/sys/lib/antiword/8859-1.txt @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-1:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-1:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-1 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-1 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic) +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic) +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic) +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic) +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/sys/lib/antiword/8859-10.txt b/sys/lib/antiword/8859-10.txt new file mode 100755 index 000000000..374a42b1a --- /dev/null +++ b/sys/lib/antiword/8859-10.txt @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-10:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.1 +# Table format: Format A +# Date: 1999 October 11 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-10:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-10 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-10 order. +# +# Version history +# 1.0 version new. +# 1.1 corrected mistake in mapping of 0xA4 +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x0112 # LATIN CAPITAL LETTER E WITH MACRON +0xA3 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA +0xA4 0x012A # LATIN CAPITAL LETTER I WITH MACRON +0xA5 0x0128 # LATIN CAPITAL LETTER I WITH TILDE +0xA6 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA +0xA7 0x00A7 # SECTION SIGN +0xA8 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA +0xA9 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xAA 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xAB 0x0166 # LATIN CAPITAL LETTER T WITH STROKE +0xAC 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x016A # LATIN CAPITAL LETTER U WITH MACRON +0xAF 0x014A # LATIN CAPITAL LETTER ENG +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xB2 0x0113 # LATIN SMALL LETTER E WITH MACRON +0xB3 0x0123 # LATIN SMALL LETTER G WITH CEDILLA +0xB4 0x012B # LATIN SMALL LETTER I WITH MACRON +0xB5 0x0129 # LATIN SMALL LETTER I WITH TILDE +0xB6 0x0137 # LATIN SMALL LETTER K WITH CEDILLA +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x013C # LATIN SMALL LETTER L WITH CEDILLA +0xB9 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xBA 0x0161 # LATIN SMALL LETTER S WITH CARON +0xBB 0x0167 # LATIN SMALL LETTER T WITH STROKE +0xBC 0x017E # LATIN SMALL LETTER Z WITH CARON +0xBD 0x2015 # HORIZONTAL BAR +0xBE 0x016B # LATIN SMALL LETTER U WITH MACRON +0xBF 0x014B # LATIN SMALL LETTER ENG +0xC0 0x0100 # LATIN CAPITAL LETTER A WITH MACRON +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x012E # LATIN CAPITAL LETTER I WITH OGONEK +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic) +0xD1 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA +0xD2 0x014C # LATIN CAPITAL LETTER O WITH MACRON +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x0168 # LATIN CAPITAL LETTER U WITH TILDE +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic) +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x0101 # LATIN SMALL LETTER A WITH MACRON +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x012F # LATIN SMALL LETTER I WITH OGONEK +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic) +0xF1 0x0146 # LATIN SMALL LETTER N WITH CEDILLA +0xF2 0x014D # LATIN SMALL LETTER O WITH MACRON +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x0169 # LATIN SMALL LETTER U WITH TILDE +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x0173 # LATIN SMALL LETTER U WITH OGONEK +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic) +0xFF 0x0138 # LATIN SMALL LETTER KRA diff --git a/sys/lib/antiword/8859-13.txt b/sys/lib/antiword/8859-13.txt new file mode 100755 index 000000000..cd11b53fd --- /dev/null +++ b/sys/lib/antiword/8859-13.txt @@ -0,0 +1,299 @@ +# +# Name: ISO/IEC 8859-13:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-13:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-13 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-13 order. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x201D # RIGHT DOUBLE QUOTATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x201E # DOUBLE LOW-9 QUOTATION MARK +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x0156 # LATIN CAPITAL LETTER R WITH CEDILLA +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00C6 # LATIN CAPITAL LETTER AE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x201C # LEFT DOUBLE QUOTATION MARK +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x0157 # LATIN SMALL LETTER R WITH CEDILLA +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00E6 # LATIN SMALL LETTER AE +0xC0 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xC1 0x012E # LATIN CAPITAL LETTER I WITH OGONEK +0xC2 0x0100 # LATIN CAPITAL LETTER A WITH MACRON +0xC3 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xC7 0x0112 # LATIN CAPITAL LETTER E WITH MACRON +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE +0xCB 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE +0xCC 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA +0xCD 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA +0xCE 0x012A # LATIN CAPITAL LETTER I WITH MACRON +0xCF 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA +0xD0 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xD1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE +0xD2 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x014C # LATIN CAPITAL LETTER O WITH MACRON +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK +0xD9 0x0141 # LATIN CAPITAL LETTER L WITH STROKE +0xDA 0x015A # LATIN CAPITAL LETTER S WITH ACUTE +0xDB 0x016A # LATIN CAPITAL LETTER U WITH MACRON +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xDE 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xE1 0x012F # LATIN SMALL LETTER I WITH OGONEK +0xE2 0x0101 # LATIN SMALL LETTER A WITH MACRON +0xE3 0x0107 # LATIN SMALL LETTER C WITH ACUTE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xE7 0x0113 # LATIN SMALL LETTER E WITH MACRON +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x017A # LATIN SMALL LETTER Z WITH ACUTE +0xEB 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE +0xEC 0x0123 # LATIN SMALL LETTER G WITH CEDILLA +0xED 0x0137 # LATIN SMALL LETTER K WITH CEDILLA +0xEE 0x012B # LATIN SMALL LETTER I WITH MACRON +0xEF 0x013C # LATIN SMALL LETTER L WITH CEDILLA +0xF0 0x0161 # LATIN SMALL LETTER S WITH CARON +0xF1 0x0144 # LATIN SMALL LETTER N WITH ACUTE +0xF2 0x0146 # LATIN SMALL LETTER N WITH CEDILLA +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x014D # LATIN SMALL LETTER O WITH MACRON +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x0173 # LATIN SMALL LETTER U WITH OGONEK +0xF9 0x0142 # LATIN SMALL LETTER L WITH STROKE +0xFA 0x015B # LATIN SMALL LETTER S WITH ACUTE +0xFB 0x016B # LATIN SMALL LETTER U WITH MACRON +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xFE 0x017E # LATIN SMALL LETTER Z WITH CARON +0xFF 0x2019 # RIGHT SINGLE QUOTATION MARK diff --git a/sys/lib/antiword/8859-14.txt b/sys/lib/antiword/8859-14.txt new file mode 100755 index 000000000..36038f413 --- /dev/null +++ b/sys/lib/antiword/8859-14.txt @@ -0,0 +1,301 @@ +# +# Name: ISO/IEC 8859-14:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Markus Kuhn <mkuhn@acm.org> +# Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-14:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-14 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-14 order. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x1E02 # LATIN CAPITAL LETTER B WITH DOT ABOVE +0xA2 0x1E03 # LATIN SMALL LETTER B WITH DOT ABOVE +0xA3 0x00A3 # POUND SIGN +0xA4 0x010A # LATIN CAPITAL LETTER C WITH DOT ABOVE +0xA5 0x010B # LATIN SMALL LETTER C WITH DOT ABOVE +0xA6 0x1E0A # LATIN CAPITAL LETTER D WITH DOT ABOVE +0xA7 0x00A7 # SECTION SIGN +0xA8 0x1E80 # LATIN CAPITAL LETTER W WITH GRAVE +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x1E82 # LATIN CAPITAL LETTER W WITH ACUTE +0xAB 0x1E0B # LATIN SMALL LETTER D WITH DOT ABOVE +0xAC 0x1EF2 # LATIN CAPITAL LETTER Y WITH GRAVE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xB0 0x1E1E # LATIN CAPITAL LETTER F WITH DOT ABOVE +0xB1 0x1E1F # LATIN SMALL LETTER F WITH DOT ABOVE +0xB2 0x0120 # LATIN CAPITAL LETTER G WITH DOT ABOVE +0xB3 0x0121 # LATIN SMALL LETTER G WITH DOT ABOVE +0xB4 0x1E40 # LATIN CAPITAL LETTER M WITH DOT ABOVE +0xB5 0x1E41 # LATIN SMALL LETTER M WITH DOT ABOVE +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x1E56 # LATIN CAPITAL LETTER P WITH DOT ABOVE +0xB8 0x1E81 # LATIN SMALL LETTER W WITH GRAVE +0xB9 0x1E57 # LATIN SMALL LETTER P WITH DOT ABOVE +0xBA 0x1E83 # LATIN SMALL LETTER W WITH ACUTE +0xBB 0x1E60 # LATIN CAPITAL LETTER S WITH DOT ABOVE +0xBC 0x1EF3 # LATIN SMALL LETTER Y WITH GRAVE +0xBD 0x1E84 # LATIN CAPITAL LETTER W WITH DIAERESIS +0xBE 0x1E85 # LATIN SMALL LETTER W WITH DIAERESIS +0xBF 0x1E61 # LATIN SMALL LETTER S WITH DOT ABOVE +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x0174 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x1E6A # LATIN CAPITAL LETTER T WITH DOT ABOVE +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x0176 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x0175 # LATIN SMALL LETTER W WITH CIRCUMFLEX +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x1E6B # LATIN SMALL LETTER T WITH DOT ABOVE +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x0177 # LATIN SMALL LETTER Y WITH CIRCUMFLEX +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS + diff --git a/sys/lib/antiword/8859-15.txt b/sys/lib/antiword/8859-15.txt new file mode 100755 index 000000000..1e319707d --- /dev/null +++ b/sys/lib/antiword/8859-15.txt @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-15:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Markus Kuhn <mkuhn@acm.org> +# Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-15:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-15 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-15 order. +# +# Version history +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x20AC # EURO SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xA7 0x00A7 # SECTION SIGN +0xA8 0x0161 # LATIN SMALL LETTER S WITH CARON +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x017E # LATIN SMALL LETTER Z WITH CARON +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0152 # LATIN CAPITAL LIGATURE OE +0xBD 0x0153 # LATIN SMALL LIGATURE OE +0xBE 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS + diff --git a/sys/lib/antiword/8859-16.txt b/sys/lib/antiword/8859-16.txt new file mode 100755 index 000000000..5353d747b --- /dev/null +++ b/sys/lib/antiword/8859-16.txt @@ -0,0 +1,299 @@ +# +# Name: ISO/IEC 8859-16:2001 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 2001 July 26 +# Authors: Markus Kuhn <mkuhn@acm.org> +# +# Copyright (c) 1999-2001 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-16:2001 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-16 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-16 order. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xA3 0x0141 # LATIN CAPITAL LETTER L WITH STROKE +0xA4 0x20AC # EURO SIGN +0xA5 0x201E # DOUBLE LOW-9 QUOTATION MARK +0xA6 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xA7 0x00A7 # SECTION SIGN +0xA8 0x0161 # LATIN SMALL LETTER S WITH CARON +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x0218 # LATIN CAPITAL LETTER S WITH COMMA BELOW +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x017A # LATIN SMALL LETTER Z WITH ACUTE +0xAF 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xB3 0x0142 # LATIN SMALL LETTER L WITH STROKE +0xB4 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xB5 0x201D # RIGHT DOUBLE QUOTATION MARK +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x017E # LATIN SMALL LETTER Z WITH CARON +0xB9 0x010D # LATIN SMALL LETTER C WITH CARON +0xBA 0x0219 # LATIN SMALL LETTER S WITH COMMA BELOW +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0152 # LATIN CAPITAL LIGATURE OE +0xBD 0x0153 # LATIN SMALL LIGATURE OE +0xBE 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xBF 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x0102 # LATIN CAPITAL LETTER A WITH BREVE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xD1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x0150 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x015A # LATIN CAPITAL LETTER S WITH ACUTE +0xD8 0x0170 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xDE 0x021A # LATIN CAPITAL LETTER T WITH COMMA BELOW +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x0103 # LATIN SMALL LETTER A WITH BREVE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x0107 # LATIN SMALL LETTER C WITH ACUTE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xF1 0x0144 # LATIN SMALL LETTER N WITH ACUTE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x0151 # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x015B # LATIN SMALL LETTER S WITH ACUTE +0xF8 0x0171 # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xFE 0x021B # LATIN SMALL LETTER T WITH COMMA BELOW +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/sys/lib/antiword/8859-2.txt b/sys/lib/antiword/8859-2.txt new file mode 100755 index 000000000..e45df25eb --- /dev/null +++ b/sys/lib/antiword/8859-2.txt @@ -0,0 +1,303 @@ +# +# Name: ISO 8859-2:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-2:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-2 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-2 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x02D8 # BREVE +0xA3 0x0141 # LATIN CAPITAL LETTER L WITH STROKE +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x013D # LATIN CAPITAL LETTER L WITH CARON +0xA6 0x015A # LATIN CAPITAL LETTER S WITH ACUTE +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xAA 0x015E # LATIN CAPITAL LETTER S WITH CEDILLA +0xAB 0x0164 # LATIN CAPITAL LETTER T WITH CARON +0xAC 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xAF 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xB2 0x02DB # OGONEK +0xB3 0x0142 # LATIN SMALL LETTER L WITH STROKE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x013E # LATIN SMALL LETTER L WITH CARON +0xB6 0x015B # LATIN SMALL LETTER S WITH ACUTE +0xB7 0x02C7 # CARON +0xB8 0x00B8 # CEDILLA +0xB9 0x0161 # LATIN SMALL LETTER S WITH CARON +0xBA 0x015F # LATIN SMALL LETTER S WITH CEDILLA +0xBB 0x0165 # LATIN SMALL LETTER T WITH CARON +0xBC 0x017A # LATIN SMALL LETTER Z WITH ACUTE +0xBD 0x02DD # DOUBLE ACUTE ACCENT +0xBE 0x017E # LATIN SMALL LETTER Z WITH CARON +0xBF 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 0x0154 # LATIN CAPITAL LETTER R WITH ACUTE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x0102 # LATIN CAPITAL LETTER A WITH BREVE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x0139 # LATIN CAPITAL LETTER L WITH ACUTE +0xC6 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x011A # LATIN CAPITAL LETTER E WITH CARON +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x010E # LATIN CAPITAL LETTER D WITH CARON +0xD0 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xD1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE +0xD2 0x0147 # LATIN CAPITAL LETTER N WITH CARON +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x0150 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x0158 # LATIN CAPITAL LETTER R WITH CARON +0xD9 0x016E # LATIN CAPITAL LETTER U WITH RING ABOVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x0170 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x0162 # LATIN CAPITAL LETTER T WITH CEDILLA +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x0155 # LATIN SMALL LETTER R WITH ACUTE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x0103 # LATIN SMALL LETTER A WITH BREVE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x013A # LATIN SMALL LETTER L WITH ACUTE +0xE6 0x0107 # LATIN SMALL LETTER C WITH ACUTE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x011B # LATIN SMALL LETTER E WITH CARON +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x010F # LATIN SMALL LETTER D WITH CARON +0xF0 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xF1 0x0144 # LATIN SMALL LETTER N WITH ACUTE +0xF2 0x0148 # LATIN SMALL LETTER N WITH CARON +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x0151 # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x0159 # LATIN SMALL LETTER R WITH CARON +0xF9 0x016F # LATIN SMALL LETTER U WITH RING ABOVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x0171 # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x0163 # LATIN SMALL LETTER T WITH CEDILLA +0xFF 0x02D9 # DOT ABOVE diff --git a/sys/lib/antiword/8859-3.txt b/sys/lib/antiword/8859-3.txt new file mode 100755 index 000000000..9b6ac69dd --- /dev/null +++ b/sys/lib/antiword/8859-3.txt @@ -0,0 +1,296 @@ +# +# Name: ISO/IEC 8859-3:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-3:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-3 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-3 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0126 # LATIN CAPITAL LETTER H WITH STROKE +0xA2 0x02D8 # BREVE +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA6 0x0124 # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x0130 # LATIN CAPITAL LETTER I WITH DOT ABOVE +0xAA 0x015E # LATIN CAPITAL LETTER S WITH CEDILLA +0xAB 0x011E # LATIN CAPITAL LETTER G WITH BREVE +0xAC 0x0134 # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0xAD 0x00AD # SOFT HYPHEN +0xAF 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0127 # LATIN SMALL LETTER H WITH STROKE +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x0125 # LATIN SMALL LETTER H WITH CIRCUMFLEX +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x0131 # LATIN SMALL LETTER DOTLESS I +0xBA 0x015F # LATIN SMALL LETTER S WITH CEDILLA +0xBB 0x011F # LATIN SMALL LETTER G WITH BREVE +0xBC 0x0135 # LATIN SMALL LETTER J WITH CIRCUMFLEX +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBF 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x010A # LATIN CAPITAL LETTER C WITH DOT ABOVE +0xC6 0x0108 # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x0120 # LATIN CAPITAL LETTER G WITH DOT ABOVE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x011C # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x016C # LATIN CAPITAL LETTER U WITH BREVE +0xDE 0x015C # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x010B # LATIN SMALL LETTER C WITH DOT ABOVE +0xE6 0x0109 # LATIN SMALL LETTER C WITH CIRCUMFLEX +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x0121 # LATIN SMALL LETTER G WITH DOT ABOVE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x011D # LATIN SMALL LETTER G WITH CIRCUMFLEX +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x016D # LATIN SMALL LETTER U WITH BREVE +0xFE 0x015D # LATIN SMALL LETTER S WITH CIRCUMFLEX +0xFF 0x02D9 # DOT ABOVE diff --git a/sys/lib/antiword/8859-4.txt b/sys/lib/antiword/8859-4.txt new file mode 100755 index 000000000..662e698ab --- /dev/null +++ b/sys/lib/antiword/8859-4.txt @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-4:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-4:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-4 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-4 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x0138 # LATIN SMALL LETTER KRA +0xA3 0x0156 # LATIN CAPITAL LETTER R WITH CEDILLA +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x0128 # LATIN CAPITAL LETTER I WITH TILDE +0xA6 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xAA 0x0112 # LATIN CAPITAL LETTER E WITH MACRON +0xAB 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA +0xAC 0x0166 # LATIN CAPITAL LETTER T WITH STROKE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xB2 0x02DB # OGONEK +0xB3 0x0157 # LATIN SMALL LETTER R WITH CEDILLA +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x0129 # LATIN SMALL LETTER I WITH TILDE +0xB6 0x013C # LATIN SMALL LETTER L WITH CEDILLA +0xB7 0x02C7 # CARON +0xB8 0x00B8 # CEDILLA +0xB9 0x0161 # LATIN SMALL LETTER S WITH CARON +0xBA 0x0113 # LATIN SMALL LETTER E WITH MACRON +0xBB 0x0123 # LATIN SMALL LETTER G WITH CEDILLA +0xBC 0x0167 # LATIN SMALL LETTER T WITH STROKE +0xBD 0x014A # LATIN CAPITAL LETTER ENG +0xBE 0x017E # LATIN SMALL LETTER Z WITH CARON +0xBF 0x014B # LATIN SMALL LETTER ENG +0xC0 0x0100 # LATIN CAPITAL LETTER A WITH MACRON +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x012E # LATIN CAPITAL LETTER I WITH OGONEK +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x012A # LATIN CAPITAL LETTER I WITH MACRON +0xD0 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xD1 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA +0xD2 0x014C # LATIN CAPITAL LETTER O WITH MACRON +0xD3 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x0168 # LATIN CAPITAL LETTER U WITH TILDE +0xDE 0x016A # LATIN CAPITAL LETTER U WITH MACRON +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x0101 # LATIN SMALL LETTER A WITH MACRON +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x012F # LATIN SMALL LETTER I WITH OGONEK +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x012B # LATIN SMALL LETTER I WITH MACRON +0xF0 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xF1 0x0146 # LATIN SMALL LETTER N WITH CEDILLA +0xF2 0x014D # LATIN SMALL LETTER O WITH MACRON +0xF3 0x0137 # LATIN SMALL LETTER K WITH CEDILLA +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x0173 # LATIN SMALL LETTER U WITH OGONEK +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x0169 # LATIN SMALL LETTER U WITH TILDE +0xFE 0x016B # LATIN SMALL LETTER U WITH MACRON +0xFF 0x02D9 # DOT ABOVE diff --git a/sys/lib/antiword/8859-5.txt b/sys/lib/antiword/8859-5.txt new file mode 100755 index 000000000..a7ed1ce2a --- /dev/null +++ b/sys/lib/antiword/8859-5.txt @@ -0,0 +1,303 @@ +# +# Name: ISO 8859-5:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-5:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-5 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-5 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0401 # CYRILLIC CAPITAL LETTER IO +0xA2 0x0402 # CYRILLIC CAPITAL LETTER DJE +0xA3 0x0403 # CYRILLIC CAPITAL LETTER GJE +0xA4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xA5 0x0405 # CYRILLIC CAPITAL LETTER DZE +0xA6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xA7 0x0407 # CYRILLIC CAPITAL LETTER YI +0xA8 0x0408 # CYRILLIC CAPITAL LETTER JE +0xA9 0x0409 # CYRILLIC CAPITAL LETTER LJE +0xAA 0x040A # CYRILLIC CAPITAL LETTER NJE +0xAB 0x040B # CYRILLIC CAPITAL LETTER TSHE +0xAC 0x040C # CYRILLIC CAPITAL LETTER KJE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x040E # CYRILLIC CAPITAL LETTER SHORT U +0xAF 0x040F # CYRILLIC CAPITAL LETTER DZHE +0xB0 0x0410 # CYRILLIC CAPITAL LETTER A +0xB1 0x0411 # CYRILLIC CAPITAL LETTER BE +0xB2 0x0412 # CYRILLIC CAPITAL LETTER VE +0xB3 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xB4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xB5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xB6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xB7 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xB8 0x0418 # CYRILLIC CAPITAL LETTER I +0xB9 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xBA 0x041A # CYRILLIC CAPITAL LETTER KA +0xBB 0x041B # CYRILLIC CAPITAL LETTER EL +0xBC 0x041C # CYRILLIC CAPITAL LETTER EM +0xBD 0x041D # CYRILLIC CAPITAL LETTER EN +0xBE 0x041E # CYRILLIC CAPITAL LETTER O +0xBF 0x041F # CYRILLIC CAPITAL LETTER PE +0xC0 0x0420 # CYRILLIC CAPITAL LETTER ER +0xC1 0x0421 # CYRILLIC CAPITAL LETTER ES +0xC2 0x0422 # CYRILLIC CAPITAL LETTER TE +0xC3 0x0423 # CYRILLIC CAPITAL LETTER U +0xC4 0x0424 # CYRILLIC CAPITAL LETTER EF +0xC5 0x0425 # CYRILLIC CAPITAL LETTER HA +0xC6 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xC7 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xC8 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xC9 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xCA 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN +0xCB 0x042B # CYRILLIC CAPITAL LETTER YERU +0xCC 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xCD 0x042D # CYRILLIC CAPITAL LETTER E +0xCE 0x042E # CYRILLIC CAPITAL LETTER YU +0xCF 0x042F # CYRILLIC CAPITAL LETTER YA +0xD0 0x0430 # CYRILLIC SMALL LETTER A +0xD1 0x0431 # CYRILLIC SMALL LETTER BE +0xD2 0x0432 # CYRILLIC SMALL LETTER VE +0xD3 0x0433 # CYRILLIC SMALL LETTER GHE +0xD4 0x0434 # CYRILLIC SMALL LETTER DE +0xD5 0x0435 # CYRILLIC SMALL LETTER IE +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0437 # CYRILLIC SMALL LETTER ZE +0xD8 0x0438 # CYRILLIC SMALL LETTER I +0xD9 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xDA 0x043A # CYRILLIC SMALL LETTER KA +0xDB 0x043B # CYRILLIC SMALL LETTER EL +0xDC 0x043C # CYRILLIC SMALL LETTER EM +0xDD 0x043D # CYRILLIC SMALL LETTER EN +0xDE 0x043E # CYRILLIC SMALL LETTER O +0xDF 0x043F # CYRILLIC SMALL LETTER PE +0xE0 0x0440 # CYRILLIC SMALL LETTER ER +0xE1 0x0441 # CYRILLIC SMALL LETTER ES +0xE2 0x0442 # CYRILLIC SMALL LETTER TE +0xE3 0x0443 # CYRILLIC SMALL LETTER U +0xE4 0x0444 # CYRILLIC SMALL LETTER EF +0xE5 0x0445 # CYRILLIC SMALL LETTER HA +0xE6 0x0446 # CYRILLIC SMALL LETTER TSE +0xE7 0x0447 # CYRILLIC SMALL LETTER CHE +0xE8 0x0448 # CYRILLIC SMALL LETTER SHA +0xE9 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xEA 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xEB 0x044B # CYRILLIC SMALL LETTER YERU +0xEC 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xED 0x044D # CYRILLIC SMALL LETTER E +0xEE 0x044E # CYRILLIC SMALL LETTER YU +0xEF 0x044F # CYRILLIC SMALL LETTER YA +0xF0 0x2116 # NUMERO SIGN +0xF1 0x0451 # CYRILLIC SMALL LETTER IO +0xF2 0x0452 # CYRILLIC SMALL LETTER DJE +0xF3 0x0453 # CYRILLIC SMALL LETTER GJE +0xF4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE +0xF5 0x0455 # CYRILLIC SMALL LETTER DZE +0xF6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xF7 0x0457 # CYRILLIC SMALL LETTER YI +0xF8 0x0458 # CYRILLIC SMALL LETTER JE +0xF9 0x0459 # CYRILLIC SMALL LETTER LJE +0xFA 0x045A # CYRILLIC SMALL LETTER NJE +0xFB 0x045B # CYRILLIC SMALL LETTER TSHE +0xFC 0x045C # CYRILLIC SMALL LETTER KJE +0xFD 0x00A7 # SECTION SIGN +0xFE 0x045E # CYRILLIC SMALL LETTER SHORT U +0xFF 0x045F # CYRILLIC SMALL LETTER DZHE diff --git a/sys/lib/antiword/8859-6.txt b/sys/lib/antiword/8859-6.txt new file mode 100755 index 000000000..69ac7f589 --- /dev/null +++ b/sys/lib/antiword/8859-6.txt @@ -0,0 +1,260 @@ +# +# Name: ISO 8859-6:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-6:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-6 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-6 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# 0x30..0x39 remapped to the ASCII digits (U+0030..U+0039) instead +# of the Arabic digits (U+0660..U+0669). +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA4 0x00A4 # CURRENCY SIGN +0xAC 0x060C # ARABIC COMMA +0xAD 0x00AD # SOFT HYPHEN +0xBB 0x061B # ARABIC SEMICOLON +0xBF 0x061F # ARABIC QUESTION MARK +0xC1 0x0621 # ARABIC LETTER HAMZA +0xC2 0x0622 # ARABIC LETTER ALEF WITH MADDA ABOVE +0xC3 0x0623 # ARABIC LETTER ALEF WITH HAMZA ABOVE +0xC4 0x0624 # ARABIC LETTER WAW WITH HAMZA ABOVE +0xC5 0x0625 # ARABIC LETTER ALEF WITH HAMZA BELOW +0xC6 0x0626 # ARABIC LETTER YEH WITH HAMZA ABOVE +0xC7 0x0627 # ARABIC LETTER ALEF +0xC8 0x0628 # ARABIC LETTER BEH +0xC9 0x0629 # ARABIC LETTER TEH MARBUTA +0xCA 0x062A # ARABIC LETTER TEH +0xCB 0x062B # ARABIC LETTER THEH +0xCC 0x062C # ARABIC LETTER JEEM +0xCD 0x062D # ARABIC LETTER HAH +0xCE 0x062E # ARABIC LETTER KHAH +0xCF 0x062F # ARABIC LETTER DAL +0xD0 0x0630 # ARABIC LETTER THAL +0xD1 0x0631 # ARABIC LETTER REH +0xD2 0x0632 # ARABIC LETTER ZAIN +0xD3 0x0633 # ARABIC LETTER SEEN +0xD4 0x0634 # ARABIC LETTER SHEEN +0xD5 0x0635 # ARABIC LETTER SAD +0xD6 0x0636 # ARABIC LETTER DAD +0xD7 0x0637 # ARABIC LETTER TAH +0xD8 0x0638 # ARABIC LETTER ZAH +0xD9 0x0639 # ARABIC LETTER AIN +0xDA 0x063A # ARABIC LETTER GHAIN +0xE0 0x0640 # ARABIC TATWEEL +0xE1 0x0641 # ARABIC LETTER FEH +0xE2 0x0642 # ARABIC LETTER QAF +0xE3 0x0643 # ARABIC LETTER KAF +0xE4 0x0644 # ARABIC LETTER LAM +0xE5 0x0645 # ARABIC LETTER MEEM +0xE6 0x0646 # ARABIC LETTER NOON +0xE7 0x0647 # ARABIC LETTER HEH +0xE8 0x0648 # ARABIC LETTER WAW +0xE9 0x0649 # ARABIC LETTER ALEF MAKSURA +0xEA 0x064A # ARABIC LETTER YEH +0xEB 0x064B # ARABIC FATHATAN +0xEC 0x064C # ARABIC DAMMATAN +0xED 0x064D # ARABIC KASRATAN +0xEE 0x064E # ARABIC FATHA +0xEF 0x064F # ARABIC DAMMA +0xF0 0x0650 # ARABIC KASRA +0xF1 0x0651 # ARABIC SHADDA +0xF2 0x0652 # ARABIC SUKUN diff --git a/sys/lib/antiword/8859-7.txt b/sys/lib/antiword/8859-7.txt new file mode 100755 index 000000000..52c42d08a --- /dev/null +++ b/sys/lib/antiword/8859-7.txt @@ -0,0 +1,302 @@ +# +# Name: ISO 8859-7:1987 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO 8859-7:1987 characters map into Unicode. +# +# ISO 8859-7:1987 is equivalent to ISO-IR-126, ELOT 928, +# and ECMA 118. +# +# Format: Three tab-separated columns +# Column #1 is the ISO 8859-7 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO 8859-7 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# Remap 0xA1 to U+2018 (instead of 0x02BD) to match text of 8859-7 +# Remap 0xA2 to U+2019 (instead of 0x02BC) to match text of 8859-7 +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x2018 # LEFT SINGLE QUOTATION MARK +0xA2 0x2019 # RIGHT SINGLE QUOTATION MARK +0xA3 0x00A3 # POUND SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAF 0x2015 # HORIZONTAL BAR +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x0384 # GREEK TONOS +0xB5 0x0385 # GREEK DIALYTIKA TONOS +0xB6 0x0386 # GREEK CAPITAL LETTER ALPHA WITH TONOS +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x0388 # GREEK CAPITAL LETTER EPSILON WITH TONOS +0xB9 0x0389 # GREEK CAPITAL LETTER ETA WITH TONOS +0xBA 0x038A # GREEK CAPITAL LETTER IOTA WITH TONOS +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x038C # GREEK CAPITAL LETTER OMICRON WITH TONOS +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x038E # GREEK CAPITAL LETTER UPSILON WITH TONOS +0xBF 0x038F # GREEK CAPITAL LETTER OMEGA WITH TONOS +0xC0 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0xC1 0x0391 # GREEK CAPITAL LETTER ALPHA +0xC2 0x0392 # GREEK CAPITAL LETTER BETA +0xC3 0x0393 # GREEK CAPITAL LETTER GAMMA +0xC4 0x0394 # GREEK CAPITAL LETTER DELTA +0xC5 0x0395 # GREEK CAPITAL LETTER EPSILON +0xC6 0x0396 # GREEK CAPITAL LETTER ZETA +0xC7 0x0397 # GREEK CAPITAL LETTER ETA +0xC8 0x0398 # GREEK CAPITAL LETTER THETA +0xC9 0x0399 # GREEK CAPITAL LETTER IOTA +0xCA 0x039A # GREEK CAPITAL LETTER KAPPA +0xCB 0x039B # GREEK CAPITAL LETTER LAMDA +0xCC 0x039C # GREEK CAPITAL LETTER MU +0xCD 0x039D # GREEK CAPITAL LETTER NU +0xCE 0x039E # GREEK CAPITAL LETTER XI +0xCF 0x039F # GREEK CAPITAL LETTER OMICRON +0xD0 0x03A0 # GREEK CAPITAL LETTER PI +0xD1 0x03A1 # GREEK CAPITAL LETTER RHO +0xD3 0x03A3 # GREEK CAPITAL LETTER SIGMA +0xD4 0x03A4 # GREEK CAPITAL LETTER TAU +0xD5 0x03A5 # GREEK CAPITAL LETTER UPSILON +0xD6 0x03A6 # GREEK CAPITAL LETTER PHI +0xD7 0x03A7 # GREEK CAPITAL LETTER CHI +0xD8 0x03A8 # GREEK CAPITAL LETTER PSI +0xD9 0x03A9 # GREEK CAPITAL LETTER OMEGA +0xDA 0x03AA # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0xDB 0x03AB # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0xDC 0x03AC # GREEK SMALL LETTER ALPHA WITH TONOS +0xDD 0x03AD # GREEK SMALL LETTER EPSILON WITH TONOS +0xDE 0x03AE # GREEK SMALL LETTER ETA WITH TONOS +0xDF 0x03AF # GREEK SMALL LETTER IOTA WITH TONOS +0xE0 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +0xE1 0x03B1 # GREEK SMALL LETTER ALPHA +0xE2 0x03B2 # GREEK SMALL LETTER BETA +0xE3 0x03B3 # GREEK SMALL LETTER GAMMA +0xE4 0x03B4 # GREEK SMALL LETTER DELTA +0xE5 0x03B5 # GREEK SMALL LETTER EPSILON +0xE6 0x03B6 # GREEK SMALL LETTER ZETA +0xE7 0x03B7 # GREEK SMALL LETTER ETA +0xE8 0x03B8 # GREEK SMALL LETTER THETA +0xE9 0x03B9 # GREEK SMALL LETTER IOTA +0xEA 0x03BA # GREEK SMALL LETTER KAPPA +0xEB 0x03BB # GREEK SMALL LETTER LAMDA +0xEC 0x03BC # GREEK SMALL LETTER MU +0xED 0x03BD # GREEK SMALL LETTER NU +0xEE 0x03BE # GREEK SMALL LETTER XI +0xEF 0x03BF # GREEK SMALL LETTER OMICRON +0xF0 0x03C0 # GREEK SMALL LETTER PI +0xF1 0x03C1 # GREEK SMALL LETTER RHO +0xF2 0x03C2 # GREEK SMALL LETTER FINAL SIGMA +0xF3 0x03C3 # GREEK SMALL LETTER SIGMA +0xF4 0x03C4 # GREEK SMALL LETTER TAU +0xF5 0x03C5 # GREEK SMALL LETTER UPSILON +0xF6 0x03C6 # GREEK SMALL LETTER PHI +0xF7 0x03C7 # GREEK SMALL LETTER CHI +0xF8 0x03C8 # GREEK SMALL LETTER PSI +0xF9 0x03C9 # GREEK SMALL LETTER OMEGA +0xFA 0x03CA # GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xFB 0x03CB # GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xFC 0x03CC # GREEK SMALL LETTER OMICRON WITH TONOS +0xFD 0x03CD # GREEK SMALL LETTER UPSILON WITH TONOS +0xFE 0x03CE # GREEK SMALL LETTER OMEGA WITH TONOS diff --git a/sys/lib/antiword/8859-8.txt b/sys/lib/antiword/8859-8.txt new file mode 100755 index 000000000..bc8da4c7f --- /dev/null +++ b/sys/lib/antiword/8859-8.txt @@ -0,0 +1,270 @@ +# +# Name: ISO/IEC 8859-8:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.1 +# Table format: Format A +# Date: 2000-Jan-03 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-8:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-8 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-8 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# 1.1 version updates to the published 8859-8:1999, correcting +# the mapping of 0xAF and adding mappings for LRM and RLM. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00D7 # MULTIPLICATION SIGN +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00F7 # DIVISION SIGN +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xDF 0x2017 # DOUBLE LOW LINE +0xE0 0x05D0 # HEBREW LETTER ALEF +0xE1 0x05D1 # HEBREW LETTER BET +0xE2 0x05D2 # HEBREW LETTER GIMEL +0xE3 0x05D3 # HEBREW LETTER DALET +0xE4 0x05D4 # HEBREW LETTER HE +0xE5 0x05D5 # HEBREW LETTER VAV +0xE6 0x05D6 # HEBREW LETTER ZAYIN +0xE7 0x05D7 # HEBREW LETTER HET +0xE8 0x05D8 # HEBREW LETTER TET +0xE9 0x05D9 # HEBREW LETTER YOD +0xEA 0x05DA # HEBREW LETTER FINAL KAF +0xEB 0x05DB # HEBREW LETTER KAF +0xEC 0x05DC # HEBREW LETTER LAMED +0xED 0x05DD # HEBREW LETTER FINAL MEM +0xEE 0x05DE # HEBREW LETTER MEM +0xEF 0x05DF # HEBREW LETTER FINAL NUN +0xF0 0x05E0 # HEBREW LETTER NUN +0xF1 0x05E1 # HEBREW LETTER SAMEKH +0xF2 0x05E2 # HEBREW LETTER AYIN +0xF3 0x05E3 # HEBREW LETTER FINAL PE +0xF4 0x05E4 # HEBREW LETTER PE +0xF5 0x05E5 # HEBREW LETTER FINAL TSADI +0xF6 0x05E6 # HEBREW LETTER TSADI +0xF7 0x05E7 # HEBREW LETTER QOF +0xF8 0x05E8 # HEBREW LETTER RESH +0xF9 0x05E9 # HEBREW LETTER SHIN +0xFA 0x05EA # HEBREW LETTER TAV +0xFD 0x200E # LEFT-TO-RIGHT MARK +0xFE 0x200F # RIGHT-TO-LEFT MARK + diff --git a/sys/lib/antiword/8859-9.txt b/sys/lib/antiword/8859-9.txt new file mode 100755 index 000000000..22901f107 --- /dev/null +++ b/sys/lib/antiword/8859-9.txt @@ -0,0 +1,307 @@ +# +# Name: ISO/IEC 8859-9:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on magnetic media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-9:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-9 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-9 order. +# +# ISO/IEC 8859-9 is also equivalent to ISO-IR-148. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0081 # <control> +0x82 0x0082 # <control> +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0085 # <control> +0x86 0x0086 # <control> +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x008C # <control> +0x8D 0x008D # <control> +0x8E 0x008E # <control> +0x8F 0x008F # <control> +0x90 0x0090 # <control> +0x91 0x0091 # <control> +0x92 0x0092 # <control> +0x93 0x0093 # <control> +0x94 0x0094 # <control> +0x95 0x0095 # <control> +0x96 0x0096 # <control> +0x97 0x0097 # <control> +0x98 0x0098 # <control> +0x99 0x0099 # <control> +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x009C # <control> +0x9D 0x009D # <control> +0x9E 0x009E # <control> +0x9F 0x009F # <control> +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x011E # LATIN CAPITAL LETTER G WITH BREVE +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x0130 # LATIN CAPITAL LETTER I WITH DOT ABOVE +0xDE 0x015E # LATIN CAPITAL LETTER S WITH CEDILLA +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x011F # LATIN SMALL LETTER G WITH BREVE +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x0131 # LATIN SMALL LETTER DOTLESS I +0xFE 0x015F # LATIN SMALL LETTER S WITH CEDILLA +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS + + diff --git a/sys/lib/antiword/Default b/sys/lib/antiword/Default new file mode 100755 index 000000000..c16f39a99 --- /dev/null +++ b/sys/lib/antiword/Default @@ -0,0 +1,81 @@ +# Default fontnames translation table +# uses only fonts present in the RISC OS 3 ROMs +# +# MS-Word fontname, Italic, Bold, Acorn fontname, Special +Arial, 0, 0, Homerton.Medium, 0 +Arial, 0, 1, Homerton.Bold, 0 +Arial, 1, 0, Homerton.Medium.Oblique,0 +Arial, 1, 1, Homerton.Bold.Oblique, 0 +Arial Black, 0, 0, Homerton.Medium, 0 +Arial Black, 0, 1, Homerton.Bold, 0 +Arial Black, 1, 0, Homerton.Medium.Oblique,0 +Arial Black, 1, 1, Homerton.Bold.Oblique, 0 +Arial CE, 0, 0, Homerton.Medium, 0 +Arial CE, 0, 1, Homerton.Bold, 0 +Arial CE, 1, 0, Homerton.Medium.Oblique,0 +Arial CE, 1, 1, Homerton.Bold.Oblique, 0 +Arial Narrow, 0, 0, Homerton.Medium, 0 +Arial Narrow, 0, 1, Homerton.Bold, 0 +Arial Narrow, 1, 0, Homerton.Medium.Oblique,0 +Arial Narrow, 1, 1, Homerton.Bold.Oblique, 0 +Comic Sans MS, 0, 0, Homerton.Medium, 0 +Comic Sans MS, 0, 1, Homerton.Bold, 0 +Comic Sans MS, 1, 0, Homerton.Medium.Oblique,0 +Comic Sans MS, 1, 1, Homerton.Bold.Oblique, 0 +Courier, 0, 0, Corpus.Medium, 0 +Courier, 0, 1, Corpus.Bold, 0 +Courier, 1, 0, Corpus.Medium.Oblique, 0 +Courier, 1, 1, Corpus.Bold.Oblique, 0 +Courier New, 0, 0, Corpus.Medium, 0 +Courier New, 0, 1, Corpus.Bold, 0 +Courier New, 1, 0, Corpus.Medium.Oblique, 0 +Courier New, 1, 1, Corpus.Bold.Oblique, 0 +Fixedsys, 0, 0, Corpus.Medium, 0 +Fixedsys, 0, 1, Corpus.Bold, 0 +Fixedsys, 1, 0, Corpus.Medium.Oblique, 0 +Fixedsys, 1, 1, Corpus.Bold.Oblique, 0 +Helvetica, 0, 0, Homerton.Medium, 0 +Helvetica, 0, 1, Homerton.Bold, 0 +Helvetica, 1, 0, Homerton.Medium.Oblique,0 +Helvetica, 1, 1, Homerton.Bold.Oblique, 0 +Helvetica-Narrow, 0, 0, Homerton.Medium, 0 +Helvetica-Narrow, 0, 1, Homerton.Bold, 0 +Helvetica-Narrow, 1, 0, Homerton.Medium.Oblique,0 +Helvetica-Narrow, 1, 1, Homerton.Bold.Oblique, 0 +Lucida Console, 0, 0, Corpus.Medium, 0 +Lucida Console, 0, 1, Corpus.Bold, 0 +Lucida Console, 1, 0, Corpus.Medium.Oblique, 0 +Lucida Console, 1, 1, Corpus.Bold.Oblique, 0 +Monotype.com, 0, 0, Corpus.Medium, 0 +Monotype.com, 0, 1, Corpus.Bold, 0 +Monotype.com, 1, 0, Corpus.Medium.Oblique, 0 +Monotype.com, 1, 1, Corpus.Bold.Oblique, 0 +MS Sans Serif, 0, 0, Homerton.Medium, 0 +MS Sans Serif, 0, 1, Homerton.Bold, 0 +MS Sans Serif, 1, 0, Homerton.Medium.Oblique,0 +MS Sans Serif, 1, 1, Homerton.Bold.Oblique, 0 +Swiss, 0, 0, Homerton.Medium, 0 +Swiss, 0, 1, Homerton.Bold, 0 +Swiss, 1, 0, Homerton.Medium.Oblique,0 +Swiss, 1, 1, Homerton.Bold.Oblique, 0 +Tahoma, 0, 0, Homerton.Medium, 0 +Tahoma, 0, 1, Homerton.Bold, 0 +Tahoma, 1, 0, Homerton.Medium.Oblique,0 +Tahoma, 1, 1, Homerton.Bold.Oblique, 0 +Trebuchet MS, 0, 0, Homerton.Medium, 0 +Trebuchet MS, 0, 1, Homerton.Bold, 0 +Trebuchet MS, 1, 0, Homerton.Medium.Oblique,0 +Trebuchet MS, 1, 1, Homerton.Bold.Oblique, 0 +Verdana, 0, 0, Homerton.Medium, 0 +Verdana, 0, 1, Homerton.Bold, 0 +Verdana, 1, 0, Homerton.Medium.Oblique,0 +Verdana, 1, 1, Homerton.Bold.Oblique, 0 +Univers, 0, 0, Homerton.Medium, 0 +Univers, 0, 1, Homerton.Bold, 0 +Univers, 1, 0, Homerton.Medium.Oblique,0 +Univers, 1, 1, Homerton.Bold.Oblique, 0 +# All the other fonts +*, 0, 0, Trinity.Medium, 0 +*, 0, 1, Trinity.Bold, 0 +*, 1, 0, Trinity.Medium.Italic, 0 +*, 1, 1, Trinity.Bold.Italic, 0 diff --git a/sys/lib/antiword/Example b/sys/lib/antiword/Example new file mode 100755 index 000000000..43d2ee21e --- /dev/null +++ b/sys/lib/antiword/Example @@ -0,0 +1,80 @@ +# An example of a fontnames translation table +# +# MS-Word fontname, Italic, Bold, Acorn fontname, Special +Arial, 0, 0, Homerton.Medium, 0 +Arial, 0, 1, Homerton.Bold, 0 +Arial, 1, 0, Homerton.Medium.Oblique,0 +Arial, 1, 1, Homerton.Bold.Oblique, 0 +Arial Black, 0, 0, Homerton.Medium, 0 +Arial Black, 0, 1, Homerton.Bold, 0 +Arial Black, 1, 0, Homerton.Medium.Oblique,0 +Arial Black, 1, 1, Homerton.Bold.Oblique, 0 +AvantGarde, 0, 0, Clare.Medium, 0 +AvantGarde, 0, 1, Clare.Demi, 0 +AvantGarde, 1, 0, Clare.Medium.Oblique, 0 +AvantGarde, 1, 1, Clare.Demi.Oblique, 0 +Bookman, 0, 0, Robinson.Light, 0 +Bookman, 0, 1, Robinson.Demi, 0 +Bookman, 1, 0, Robinson.Light.Italic, 0 +Bookman, 1, 1, Robinson.Demi.Italic, 0 +Bookman Old Style, 0, 0, Robinson.Light, 0 +Bookman Old Style, 0, 1, Robinson.Demi, 0 +Bookman Old Style, 1, 0, Robinson.Light.Italic, 0 +Bookman Old Style, 1, 1, Robinson.Demi.Italic, 0 +Courier, 0, 0, Corpus.Medium, 0 +Courier, 0, 1, Corpus.Bold, 0 +Courier, 1, 0, Corpus.Medium.Oblique, 0 +Courier, 1, 1, Corpus.Bold.Oblique, 0 +Courier New, 0, 0, Corpus.Medium, 0 +Courier New, 0, 1, Corpus.Bold, 0 +Courier New, 1, 0, Corpus.Medium.Oblique, 0 +Courier New, 1, 1, Corpus.Bold.Oblique, 0 +Fixedsys, 0, 0, Corpus.Medium, 0 +Fixedsys, 0, 1, Corpus.Bold, 0 +Fixedsys, 1, 0, Corpus.Medium.Oblique, 0 +Fixedsys, 1, 1, Corpus.Bold.Oblique, 0 +Helvetica, 0, 0, Homerton.Medium, 0 +Helvetica, 0, 1, Homerton.Bold, 0 +Helvetica, 1, 0, Homerton.Medium.Oblique,0 +Helvetica, 1, 1, Homerton.Bold.Oblique, 0 +Lucida Console, 0, 0, Corpus.Medium, 0 +Lucida Console, 0, 1, Corpus.Bold, 0 +Lucida Console, 1, 0, Corpus.Medium.Oblique, 0 +Lucida Console, 1, 1, Corpus.Bold.Oblique, 0 +Palatino, 0, 0, Pembroke.Medium, 0 +Palatino, 0, 1, Pembroke.Bold, 0 +Palatino, 1, 0, Pembroke.Medium.Italic, 0 +Palatino, 1, 1, Pembroke.Bold.Italic, 0 +Swiss, 0, 0, Homerton.Medium, 0 +Swiss, 0, 1, Homerton.Bold, 0 +Swiss, 1, 0, Homerton.Medium.Oblique,0 +Swiss, 1, 1, Homerton.Bold.Oblique, 0 +Symbol, 0, 0, Sidney, 1 +Symbol, 0, 1, Sidney, 1 +Symbol, 1, 0, Sidney, 1 +Symbol, 1, 1, Sidney, 1 +Times, 0, 0, Trinity.Medium, 0 +Times, 0, 1, Trinity.Bold, 0 +Times, 1, 0, Trinity.Medium.Italic, 0 +Times, 1, 1, Trinity.Bold.Italic, 0 +Times New Roman, 0, 0, Trinity.Medium, 0 +Times New Roman, 0, 1, Trinity.Bold, 0 +Times New Roman, 1, 0, Trinity.Medium.Italic, 0 +Times New Roman, 1, 1, Trinity.Bold.Italic, 0 +Times Roman, 0, 0, Trinity.Medium, 0 +Times Roman, 0, 1, Trinity.Bold, 0 +Times Roman, 1, 0, Trinity.Medium.Italic, 0 +Times Roman, 1, 1, Trinity.Bold.Italic, 0 +Univers, 0, 0, Homerton.Medium, 0 +Univers, 0, 1, Homerton.Bold, 0 +Univers, 1, 0, Homerton.Medium.Oblique,0 +Univers, 1, 1, Homerton.Bold.Oblique, 0 +ZapfDingbats, 0, 0, Selwyn, 2 +ZapfDingbats, 0, 1, Selwyn, 2 +ZapfDingbats, 1, 0, Selwyn, 2 +ZapfDingbats, 1, 1, Selwyn, 2 +# All the other fonts +*, 0, 0, Trinity.Medium, 0 +*, 0, 1, Trinity.Bold, 0 +*, 1, 0, Trinity.Medium.Italic, 0 +*, 1, 1, Trinity.Bold.Italic, 0 diff --git a/sys/lib/antiword/MacRoman.txt b/sys/lib/antiword/MacRoman.txt new file mode 100755 index 000000000..8821f3b43 --- /dev/null +++ b/sys/lib/antiword/MacRoman.txt @@ -0,0 +1,364 @@ +#======================================================================= +# FTP file name: ROMAN.TXT +# +# Contents: Map (external version) from Mac OS Roman +# character set to Unicode 2.1 +# +# Copyright: (c) 1994-1999 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# b03 1999-Sep-22 Update contact e-mail address. Matches +# internal utom<b4>, ufrm<b3>, and Text +# Encoding Converter version 1.5. +# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to +# EURO SIGN. Matches internal utom<b3>, +# ufrm<b3>. +# n08 1998-Feb-05 Minor update to header comments +# n06 1997-Dec-14 Add warning about future changes to 0xDB +# from CURRENCY SIGN to EURO SIGN. Clarify +# some header information +# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm<n9>. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple makes no warranty or representation, either express or +# implied, with respect to these tables, their quality, accuracy, or +# fitness for a particular purpose. In no event will Apple be liable +# for direct, indirect, special, incidental, or consequential damages +# resulting from any defect or inaccuracy in this document or the +# accompanying tables. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/APPLE/> +# <ftp://dev.apple.com/devworld/Technical_Documentation/Misc._Standards/> +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Roman code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Roman code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Roman character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Roman: +# ---------------------- +# +# This character set is used for at least the following Mac OS +# localizations: U.S., British, Canadian French, French, Swiss +# French, German, Swiss German, Italian, Swiss Italian, Dutch, +# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan, +# Portuguese, Brazilian, and the default International system. +# +# Variants of Mac OS Roman are used for Croatian, Icelandic, +# Turkish, Romanian, and other encodings. Separate mapping tables +# are available for these encodings. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Roman encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# Note: U+20AC is new with Unicode 2.1; for earlier Unicode +# versions, Mac OS Roman 0xDB may be mapped to private-use character +# U+F8A0. +# +# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago, +# New York, Geneva, and Monaco did not implement the full Mac OS +# Roman character set; they only supported character codes up to +# 0xD8. The TrueType versions of these fonts have always implemented +# the full character set, as with the bitmap and TrueType versions +# of the other standard Roman fonts. +# +# In all Mac OS encodings, fonts such as Chicago which are used +# as "system" fonts (for menus, dialogs, etc.) have four glyphs +# at code points 0x11-0x14 for transient use by the Menu Manager. +# These glyphs are not intended as characters for use in normal +# text, and the associated code points are not generally +# interpreted as associated with these glyphs; they are usually +# interpreted (if at all) as the control codes DC1-DC4. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n08 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n03 to version n04: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## + +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +# +0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xA0 0x2020 # DAGGER +0xA1 0x00B0 # DEGREE SIGN +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A7 # SECTION SIGN +0xA5 0x2022 # BULLET +0xA6 0x00B6 # PILCROW SIGN +0xA7 0x00DF # LATIN SMALL LETTER SHARP S +0xA8 0x00AE # REGISTERED SIGN +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x2122 # TRADE MARK SIGN +0xAB 0x00B4 # ACUTE ACCENT +0xAC 0x00A8 # DIAERESIS +0xAD 0x2260 # NOT EQUAL TO +0xAE 0x00C6 # LATIN CAPITAL LETTER AE +0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xB0 0x221E # INFINITY +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x2264 # LESS-THAN OR EQUAL TO +0xB3 0x2265 # GREATER-THAN OR EQUAL TO +0xB4 0x00A5 # YEN SIGN +0xB5 0x00B5 # MICRO SIGN +0xB6 0x2202 # PARTIAL DIFFERENTIAL +0xB7 0x2211 # N-ARY SUMMATION +0xB8 0x220F # N-ARY PRODUCT +0xB9 0x03C0 # GREEK SMALL LETTER PI +0xBA 0x222B # INTEGRAL +0xBB 0x00AA # FEMININE ORDINAL INDICATOR +0xBC 0x00BA # MASCULINE ORDINAL INDICATOR +0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA +0xBE 0x00E6 # LATIN SMALL LETTER AE +0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xC0 0x00BF # INVERTED QUESTION MARK +0xC1 0x00A1 # INVERTED EXCLAMATION MARK +0xC2 0x00AC # NOT SIGN +0xC3 0x221A # SQUARE ROOT +0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK +0xC5 0x2248 # ALMOST EQUAL TO +0xC6 0x2206 # INCREMENT +0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC9 0x2026 # HORIZONTAL ELLIPSIS +0xCA 0x00A0 # NO-BREAK SPACE +0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xCE 0x0152 # LATIN CAPITAL LIGATURE OE +0xCF 0x0153 # LATIN SMALL LIGATURE OE +0xD0 0x2013 # EN DASH +0xD1 0x2014 # EM DASH +0xD2 0x201C # LEFT DOUBLE QUOTATION MARK +0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK +0xD4 0x2018 # LEFT SINGLE QUOTATION MARK +0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK +0xD6 0x00F7 # DIVISION SIGN +0xD7 0x25CA # LOZENGE +0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS +0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xDA 0x2044 # FRACTION SLASH +0xDB 0x20AC # EURO SIGN +0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0xDE 0xFB01 # LATIN SMALL LIGATURE FI +0xDF 0xFB02 # LATIN SMALL LIGATURE FL +0xE0 0x2021 # DOUBLE DAGGER +0xE1 0x00B7 # MIDDLE DOT +0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK +0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK +0xE4 0x2030 # PER MILLE SIGN +0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xF0 0xF8FF # Apple logo +0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I +0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT +0xF7 0x02DC # SMALL TILDE +0xF8 0x00AF # MACRON +0xF9 0x02D8 # BREVE +0xFA 0x02D9 # DOT ABOVE +0xFB 0x02DA # RING ABOVE +0xFC 0x00B8 # CEDILLA +0xFD 0x02DD # DOUBLE ACUTE ACCENT +0xFE 0x02DB # OGONEK +0xFF 0x02C7 # CARON diff --git a/sys/lib/antiword/UTF-8.txt b/sys/lib/antiword/UTF-8.txt new file mode 100755 index 000000000..46431f909 --- /dev/null +++ b/sys/lib/antiword/UTF-8.txt @@ -0,0 +1,3 @@ +# UTF-8 to Unicode +# This file is a dummy. +# The conversion is done algorithmicly, not by a table look-up. diff --git a/sys/lib/antiword/Unicode01 b/sys/lib/antiword/Unicode01 new file mode 100755 index 000000000..7e62eca56 --- /dev/null +++ b/sys/lib/antiword/Unicode01 @@ -0,0 +1,306 @@ +# +# Name: ISO/IEC 8859-1:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-1:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-1 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-1 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +#RISC OS +# This file was changed to match RISC OS specific characters (0x80 - 0x9f) +#RISC OS +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0174 # CAPITAL W CIRCUMFLEX ACCENT +0x82 0x0175 # SMALL W CIRCUMFLEX ACCENT +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0176 # CAPITAL Y CIRCUMFLEX ACCENT +0x86 0x0177 # SMALL Y CIRCUMFLEX ACCENT +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x2026 # ELLIPSIS +0x8D 0x2122 # TRADEMARK +0x8E 0x2030 # PER MILLE SIGN +0x8F 0x2022 # BULLET +0x90 0x2018 # LEFT SINGLE QUOTE +0x91 0x2019 # RIGHT SINGLE QUOTE +0x92 0x2039 # LEFT SINGLE QUOTATION MARK +0x93 0x203A # RIGHT SINGLE QUOTATION MARK +0x94 0x201C # OPENING DOUBLE QUOTE +0x95 0x201D # CLOSING DOUBLE QUOTE +0x96 0x201E # LOW DOUBLE QUOTE +0x97 0x2013 # EN DASH +0x98 0x2014 # EM_DASH +0x99 0x2011 # NON BREAKING HYPHEN +0x9A 0x0152 # LATIN CAPITAL LIGATURE OE +0x9B 0x0153 # LATIN SMALL LIGATURE OE +0x9C 0x2020 # DAGGER +0x9D 0x2021 # DOUBLE DAGGER +0x9E 0xFB01 # Fi +0x9F 0xFB02 # Fl +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic) +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic) +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic) +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic) +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/sys/lib/antiword/Unicode15 b/sys/lib/antiword/Unicode15 new file mode 100755 index 000000000..5051a3d77 --- /dev/null +++ b/sys/lib/antiword/Unicode15 @@ -0,0 +1,305 @@ +# +# Name: ISO/IEC 8859-15:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Markus Kuhn <mkuhn@acm.org> +# Ken Whistler <kenw@sybase.com> +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-15:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-15 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-15 order. +# +# Version history +# +# Updated versions of this file may be found in: +# <ftp://ftp.unicode.org/Public/MAPPINGS/> +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +#RISC OS +# This file was changed to match RISC OS specific characters (0x80 - 0x9f) +#RISC OS +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # <control> +0x81 0x0174 # CAPITAL W CIRCUMFLEX ACCENT +0x82 0x0175 # SMALL W CIRCUMFLEX ACCENT +0x83 0x0083 # <control> +0x84 0x0084 # <control> +0x85 0x0176 # CAPITAL Y CIRCUMFLEX ACCENT +0x86 0x0177 # SMALL Y CIRCUMFLEX ACCENT +0x87 0x0087 # <control> +0x88 0x0088 # <control> +0x89 0x0089 # <control> +0x8A 0x008A # <control> +0x8B 0x008B # <control> +0x8C 0x2026 # ELLIPSIS +0x8D 0x2122 # TRADEMARK +0x8E 0x2030 # PER MILLE SIGN +0x8F 0x2022 # BULLET +0x90 0x2018 # LEFT SINGLE QUOTE +0x91 0x2019 # RIGHT SINGLE QUOTE +0x92 0x2039 # LEFT SINGLE QUOTATION MARK +0x93 0x203A # RIGHT SINGLE QUOTATION MARK +0x94 0x201C # OPENING DOUBLE QUOTE +0x95 0x201D # CLOSING DOUBLE QUOTE +0x96 0x201E # LOW DOUBLE QUOTE +0x97 0x2013 # EN DASH +0x98 0x2014 # EM_DASH +0x99 0x2011 # NON BREAKING HYPHEN +0x9A 0x009A # <control> +0x9B 0x009B # <control> +0x9C 0x2020 # DAGGER +0x9D 0x2021 # DOUBLE DAGGER +0x9E 0xFB01 # Fi +0x9F 0xFB02 # Fl +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x20AC # EURO SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xA7 0x00A7 # SECTION SIGN +0xA8 0x0161 # LATIN SMALL LETTER S WITH CARON +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x017E # LATIN SMALL LETTER Z WITH CARON +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0152 # LATIN CAPITAL LIGATURE OE +0xBD 0x0153 # LATIN SMALL LIGATURE OE +0xBE 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/sys/lib/antiword/cp1250.txt b/sys/lib/antiword/cp1250.txt new file mode 100755 index 000000000..aa27ca388 --- /dev/null +++ b/sys/lib/antiword/cp1250.txt @@ -0,0 +1,274 @@ +#
+# Name: cp1250 to Unicode table
+# Unicode version: 2.0
+# Table version: 2.01
+# Table format: Format A
+# Date: 04/15/98
+#
+# Contact: cpxlate@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp1250 code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp1250 order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0A 0x000A #LINE FEED
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x001A #SUBSTITUTE
+0x1B 0x001B #ESCAPE
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2A 0x002A #ASTERISK
+0x2B 0x002B #PLUS SIGN
+0x2C 0x002C #COMMA
+0x2D 0x002D #HYPHEN-MINUS
+0x2E 0x002E #FULL STOP
+0x2F 0x002F #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3A 0x003A #COLON
+0x3B 0x003B #SEMICOLON
+0x3C 0x003C #LESS-THAN SIGN
+0x3D 0x003D #EQUALS SIGN
+0x3E 0x003E #GREATER-THAN SIGN
+0x3F 0x003F #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4A 0x004A #LATIN CAPITAL LETTER J
+0x4B 0x004B #LATIN CAPITAL LETTER K
+0x4C 0x004C #LATIN CAPITAL LETTER L
+0x4D 0x004D #LATIN CAPITAL LETTER M
+0x4E 0x004E #LATIN CAPITAL LETTER N
+0x4F 0x004F #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5A 0x005A #LATIN CAPITAL LETTER Z
+0x5B 0x005B #LEFT SQUARE BRACKET
+0x5C 0x005C #REVERSE SOLIDUS
+0x5D 0x005D #RIGHT SQUARE BRACKET
+0x5E 0x005E #CIRCUMFLEX ACCENT
+0x5F 0x005F #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6A 0x006A #LATIN SMALL LETTER J
+0x6B 0x006B #LATIN SMALL LETTER K
+0x6C 0x006C #LATIN SMALL LETTER L
+0x6D 0x006D #LATIN SMALL LETTER M
+0x6E 0x006E #LATIN SMALL LETTER N
+0x6F 0x006F #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7A 0x007A #LATIN SMALL LETTER Z
+0x7B 0x007B #LEFT CURLY BRACKET
+0x7C 0x007C #VERTICAL LINE
+0x7D 0x007D #RIGHT CURLY BRACKET
+0x7E 0x007E #TILDE
+0x7F 0x007F #DELETE
+0x80 0x20AC #EURO SIGN
+0x81 #UNDEFINED
+0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
+0x83 #UNDEFINED
+0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
+0x85 0x2026 #HORIZONTAL ELLIPSIS
+0x86 0x2020 #DAGGER
+0x87 0x2021 #DOUBLE DAGGER
+0x88 #UNDEFINED
+0x89 0x2030 #PER MILLE SIGN
+0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON
+0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8C 0x015A #LATIN CAPITAL LETTER S WITH ACUTE
+0x8D 0x0164 #LATIN CAPITAL LETTER T WITH CARON
+0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON
+0x8F 0x0179 #LATIN CAPITAL LETTER Z WITH ACUTE
+0x90 #UNDEFINED
+0x91 0x2018 #LEFT SINGLE QUOTATION MARK
+0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
+0x93 0x201C #LEFT DOUBLE QUOTATION MARK
+0x94 0x201D #RIGHT DOUBLE QUOTATION MARK
+0x95 0x2022 #BULLET
+0x96 0x2013 #EN DASH
+0x97 0x2014 #EM DASH
+0x98 #UNDEFINED
+0x99 0x2122 #TRADE MARK SIGN
+0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON
+0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9C 0x015B #LATIN SMALL LETTER S WITH ACUTE
+0x9D 0x0165 #LATIN SMALL LETTER T WITH CARON
+0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON
+0x9F 0x017A #LATIN SMALL LETTER Z WITH ACUTE
+0xA0 0x00A0 #NO-BREAK SPACE
+0xA1 0x02C7 #CARON
+0xA2 0x02D8 #BREVE
+0xA3 0x0141 #LATIN CAPITAL LETTER L WITH STROKE
+0xA4 0x00A4 #CURRENCY SIGN
+0xA5 0x0104 #LATIN CAPITAL LETTER A WITH OGONEK
+0xA6 0x00A6 #BROKEN BAR
+0xA7 0x00A7 #SECTION SIGN
+0xA8 0x00A8 #DIAERESIS
+0xA9 0x00A9 #COPYRIGHT SIGN
+0xAA 0x015E #LATIN CAPITAL LETTER S WITH CEDILLA
+0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC #NOT SIGN
+0xAD 0x00AD #SOFT HYPHEN
+0xAE 0x00AE #REGISTERED SIGN
+0xAF 0x017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0xB0 0x00B0 #DEGREE SIGN
+0xB1 0x00B1 #PLUS-MINUS SIGN
+0xB2 0x02DB #OGONEK
+0xB3 0x0142 #LATIN SMALL LETTER L WITH STROKE
+0xB4 0x00B4 #ACUTE ACCENT
+0xB5 0x00B5 #MICRO SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00B7 #MIDDLE DOT
+0xB8 0x00B8 #CEDILLA
+0xB9 0x0105 #LATIN SMALL LETTER A WITH OGONEK
+0xBA 0x015F #LATIN SMALL LETTER S WITH CEDILLA
+0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x013D #LATIN CAPITAL LETTER L WITH CARON
+0xBD 0x02DD #DOUBLE ACUTE ACCENT
+0xBE 0x013E #LATIN SMALL LETTER L WITH CARON
+0xBF 0x017C #LATIN SMALL LETTER Z WITH DOT ABOVE
+0xC0 0x0154 #LATIN CAPITAL LETTER R WITH ACUTE
+0xC1 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE
+0xC2 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3 0x0102 #LATIN CAPITAL LETTER A WITH BREVE
+0xC4 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5 0x0139 #LATIN CAPITAL LETTER L WITH ACUTE
+0xC6 0x0106 #LATIN CAPITAL LETTER C WITH ACUTE
+0xC7 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8 0x010C #LATIN CAPITAL LETTER C WITH CARON
+0xC9 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE
+0xCA 0x0118 #LATIN CAPITAL LETTER E WITH OGONEK
+0xCB 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC 0x011A #LATIN CAPITAL LETTER E WITH CARON
+0xCD 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE
+0xCE 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF 0x010E #LATIN CAPITAL LETTER D WITH CARON
+0xD0 0x0110 #LATIN CAPITAL LETTER D WITH STROKE
+0xD1 0x0143 #LATIN CAPITAL LETTER N WITH ACUTE
+0xD2 0x0147 #LATIN CAPITAL LETTER N WITH CARON
+0xD3 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE
+0xD4 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5 0x0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0xD6 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7 0x00D7 #MULTIPLICATION SIGN
+0xD8 0x0158 #LATIN CAPITAL LETTER R WITH CARON
+0xD9 0x016E #LATIN CAPITAL LETTER U WITH RING ABOVE
+0xDA 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE
+0xDB 0x0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0xDC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE 0x0162 #LATIN CAPITAL LETTER T WITH CEDILLA
+0xDF 0x00DF #LATIN SMALL LETTER SHARP S
+0xE0 0x0155 #LATIN SMALL LETTER R WITH ACUTE
+0xE1 0x00E1 #LATIN SMALL LETTER A WITH ACUTE
+0xE2 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3 0x0103 #LATIN SMALL LETTER A WITH BREVE
+0xE4 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS
+0xE5 0x013A #LATIN SMALL LETTER L WITH ACUTE
+0xE6 0x0107 #LATIN SMALL LETTER C WITH ACUTE
+0xE7 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA
+0xE8 0x010D #LATIN SMALL LETTER C WITH CARON
+0xE9 0x00E9 #LATIN SMALL LETTER E WITH ACUTE
+0xEA 0x0119 #LATIN SMALL LETTER E WITH OGONEK
+0xEB 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS
+0xEC 0x011B #LATIN SMALL LETTER E WITH CARON
+0xED 0x00ED #LATIN SMALL LETTER I WITH ACUTE
+0xEE 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF 0x010F #LATIN SMALL LETTER D WITH CARON
+0xF0 0x0111 #LATIN SMALL LETTER D WITH STROKE
+0xF1 0x0144 #LATIN SMALL LETTER N WITH ACUTE
+0xF2 0x0148 #LATIN SMALL LETTER N WITH CARON
+0xF3 0x00F3 #LATIN SMALL LETTER O WITH ACUTE
+0xF4 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5 0x0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0xF6 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS
+0xF7 0x00F7 #DIVISION SIGN
+0xF8 0x0159 #LATIN SMALL LETTER R WITH CARON
+0xF9 0x016F #LATIN SMALL LETTER U WITH RING ABOVE
+0xFA 0x00FA #LATIN SMALL LETTER U WITH ACUTE
+0xFB 0x0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0xFC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS
+0xFD 0x00FD #LATIN SMALL LETTER Y WITH ACUTE
+0xFE 0x0163 #LATIN SMALL LETTER T WITH CEDILLA
+0xFF 0x02D9 #DOT ABOVE
diff --git a/sys/lib/antiword/cp1251.txt b/sys/lib/antiword/cp1251.txt new file mode 100755 index 000000000..f6876e6d6 --- /dev/null +++ b/sys/lib/antiword/cp1251.txt @@ -0,0 +1,274 @@ +#
+# Name: cp1251 to Unicode table
+# Unicode version: 2.0
+# Table version: 2.01
+# Table format: Format A
+# Date: 04/15/98
+#
+# Contact: cpxlate@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp1251 code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp1251 order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0A 0x000A #LINE FEED
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x001A #SUBSTITUTE
+0x1B 0x001B #ESCAPE
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2A 0x002A #ASTERISK
+0x2B 0x002B #PLUS SIGN
+0x2C 0x002C #COMMA
+0x2D 0x002D #HYPHEN-MINUS
+0x2E 0x002E #FULL STOP
+0x2F 0x002F #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3A 0x003A #COLON
+0x3B 0x003B #SEMICOLON
+0x3C 0x003C #LESS-THAN SIGN
+0x3D 0x003D #EQUALS SIGN
+0x3E 0x003E #GREATER-THAN SIGN
+0x3F 0x003F #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4A 0x004A #LATIN CAPITAL LETTER J
+0x4B 0x004B #LATIN CAPITAL LETTER K
+0x4C 0x004C #LATIN CAPITAL LETTER L
+0x4D 0x004D #LATIN CAPITAL LETTER M
+0x4E 0x004E #LATIN CAPITAL LETTER N
+0x4F 0x004F #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5A 0x005A #LATIN CAPITAL LETTER Z
+0x5B 0x005B #LEFT SQUARE BRACKET
+0x5C 0x005C #REVERSE SOLIDUS
+0x5D 0x005D #RIGHT SQUARE BRACKET
+0x5E 0x005E #CIRCUMFLEX ACCENT
+0x5F 0x005F #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6A 0x006A #LATIN SMALL LETTER J
+0x6B 0x006B #LATIN SMALL LETTER K
+0x6C 0x006C #LATIN SMALL LETTER L
+0x6D 0x006D #LATIN SMALL LETTER M
+0x6E 0x006E #LATIN SMALL LETTER N
+0x6F 0x006F #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7A 0x007A #LATIN SMALL LETTER Z
+0x7B 0x007B #LEFT CURLY BRACKET
+0x7C 0x007C #VERTICAL LINE
+0x7D 0x007D #RIGHT CURLY BRACKET
+0x7E 0x007E #TILDE
+0x7F 0x007F #DELETE
+0x80 0x0402 #CYRILLIC CAPITAL LETTER DJE
+0x81 0x0403 #CYRILLIC CAPITAL LETTER GJE
+0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
+0x83 0x0453 #CYRILLIC SMALL LETTER GJE
+0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
+0x85 0x2026 #HORIZONTAL ELLIPSIS
+0x86 0x2020 #DAGGER
+0x87 0x2021 #DOUBLE DAGGER
+0x88 0x20AC #EURO SIGN
+0x89 0x2030 #PER MILLE SIGN
+0x8A 0x0409 #CYRILLIC CAPITAL LETTER LJE
+0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8C 0x040A #CYRILLIC CAPITAL LETTER NJE
+0x8D 0x040C #CYRILLIC CAPITAL LETTER KJE
+0x8E 0x040B #CYRILLIC CAPITAL LETTER TSHE
+0x8F 0x040F #CYRILLIC CAPITAL LETTER DZHE
+0x90 0x0452 #CYRILLIC SMALL LETTER DJE
+0x91 0x2018 #LEFT SINGLE QUOTATION MARK
+0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
+0x93 0x201C #LEFT DOUBLE QUOTATION MARK
+0x94 0x201D #RIGHT DOUBLE QUOTATION MARK
+0x95 0x2022 #BULLET
+0x96 0x2013 #EN DASH
+0x97 0x2014 #EM DASH
+0x98 #UNDEFINED
+0x99 0x2122 #TRADE MARK SIGN
+0x9A 0x0459 #CYRILLIC SMALL LETTER LJE
+0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9C 0x045A #CYRILLIC SMALL LETTER NJE
+0x9D 0x045C #CYRILLIC SMALL LETTER KJE
+0x9E 0x045B #CYRILLIC SMALL LETTER TSHE
+0x9F 0x045F #CYRILLIC SMALL LETTER DZHE
+0xA0 0x00A0 #NO-BREAK SPACE
+0xA1 0x040E #CYRILLIC CAPITAL LETTER SHORT U
+0xA2 0x045E #CYRILLIC SMALL LETTER SHORT U
+0xA3 0x0408 #CYRILLIC CAPITAL LETTER JE
+0xA4 0x00A4 #CURRENCY SIGN
+0xA5 0x0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0xA6 0x00A6 #BROKEN BAR
+0xA7 0x00A7 #SECTION SIGN
+0xA8 0x0401 #CYRILLIC CAPITAL LETTER IO
+0xA9 0x00A9 #COPYRIGHT SIGN
+0xAA 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC #NOT SIGN
+0xAD 0x00AD #SOFT HYPHEN
+0xAE 0x00AE #REGISTERED SIGN
+0xAF 0x0407 #CYRILLIC CAPITAL LETTER YI
+0xB0 0x00B0 #DEGREE SIGN
+0xB1 0x00B1 #PLUS-MINUS SIGN
+0xB2 0x0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0xB3 0x0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0xB4 0x0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN
+0xB5 0x00B5 #MICRO SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00B7 #MIDDLE DOT
+0xB8 0x0451 #CYRILLIC SMALL LETTER IO
+0xB9 0x2116 #NUMERO SIGN
+0xBA 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE
+0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x0458 #CYRILLIC SMALL LETTER JE
+0xBD 0x0405 #CYRILLIC CAPITAL LETTER DZE
+0xBE 0x0455 #CYRILLIC SMALL LETTER DZE
+0xBF 0x0457 #CYRILLIC SMALL LETTER YI
+0xC0 0x0410 #CYRILLIC CAPITAL LETTER A
+0xC1 0x0411 #CYRILLIC CAPITAL LETTER BE
+0xC2 0x0412 #CYRILLIC CAPITAL LETTER VE
+0xC3 0x0413 #CYRILLIC CAPITAL LETTER GHE
+0xC4 0x0414 #CYRILLIC CAPITAL LETTER DE
+0xC5 0x0415 #CYRILLIC CAPITAL LETTER IE
+0xC6 0x0416 #CYRILLIC CAPITAL LETTER ZHE
+0xC7 0x0417 #CYRILLIC CAPITAL LETTER ZE
+0xC8 0x0418 #CYRILLIC CAPITAL LETTER I
+0xC9 0x0419 #CYRILLIC CAPITAL LETTER SHORT I
+0xCA 0x041A #CYRILLIC CAPITAL LETTER KA
+0xCB 0x041B #CYRILLIC CAPITAL LETTER EL
+0xCC 0x041C #CYRILLIC CAPITAL LETTER EM
+0xCD 0x041D #CYRILLIC CAPITAL LETTER EN
+0xCE 0x041E #CYRILLIC CAPITAL LETTER O
+0xCF 0x041F #CYRILLIC CAPITAL LETTER PE
+0xD0 0x0420 #CYRILLIC CAPITAL LETTER ER
+0xD1 0x0421 #CYRILLIC CAPITAL LETTER ES
+0xD2 0x0422 #CYRILLIC CAPITAL LETTER TE
+0xD3 0x0423 #CYRILLIC CAPITAL LETTER U
+0xD4 0x0424 #CYRILLIC CAPITAL LETTER EF
+0xD5 0x0425 #CYRILLIC CAPITAL LETTER HA
+0xD6 0x0426 #CYRILLIC CAPITAL LETTER TSE
+0xD7 0x0427 #CYRILLIC CAPITAL LETTER CHE
+0xD8 0x0428 #CYRILLIC CAPITAL LETTER SHA
+0xD9 0x0429 #CYRILLIC CAPITAL LETTER SHCHA
+0xDA 0x042A #CYRILLIC CAPITAL LETTER HARD SIGN
+0xDB 0x042B #CYRILLIC CAPITAL LETTER YERU
+0xDC 0x042C #CYRILLIC CAPITAL LETTER SOFT SIGN
+0xDD 0x042D #CYRILLIC CAPITAL LETTER E
+0xDE 0x042E #CYRILLIC CAPITAL LETTER YU
+0xDF 0x042F #CYRILLIC CAPITAL LETTER YA
+0xE0 0x0430 #CYRILLIC SMALL LETTER A
+0xE1 0x0431 #CYRILLIC SMALL LETTER BE
+0xE2 0x0432 #CYRILLIC SMALL LETTER VE
+0xE3 0x0433 #CYRILLIC SMALL LETTER GHE
+0xE4 0x0434 #CYRILLIC SMALL LETTER DE
+0xE5 0x0435 #CYRILLIC SMALL LETTER IE
+0xE6 0x0436 #CYRILLIC SMALL LETTER ZHE
+0xE7 0x0437 #CYRILLIC SMALL LETTER ZE
+0xE8 0x0438 #CYRILLIC SMALL LETTER I
+0xE9 0x0439 #CYRILLIC SMALL LETTER SHORT I
+0xEA 0x043A #CYRILLIC SMALL LETTER KA
+0xEB 0x043B #CYRILLIC SMALL LETTER EL
+0xEC 0x043C #CYRILLIC SMALL LETTER EM
+0xED 0x043D #CYRILLIC SMALL LETTER EN
+0xEE 0x043E #CYRILLIC SMALL LETTER O
+0xEF 0x043F #CYRILLIC SMALL LETTER PE
+0xF0 0x0440 #CYRILLIC SMALL LETTER ER
+0xF1 0x0441 #CYRILLIC SMALL LETTER ES
+0xF2 0x0442 #CYRILLIC SMALL LETTER TE
+0xF3 0x0443 #CYRILLIC SMALL LETTER U
+0xF4 0x0444 #CYRILLIC SMALL LETTER EF
+0xF5 0x0445 #CYRILLIC SMALL LETTER HA
+0xF6 0x0446 #CYRILLIC SMALL LETTER TSE
+0xF7 0x0447 #CYRILLIC SMALL LETTER CHE
+0xF8 0x0448 #CYRILLIC SMALL LETTER SHA
+0xF9 0x0449 #CYRILLIC SMALL LETTER SHCHA
+0xFA 0x044A #CYRILLIC SMALL LETTER HARD SIGN
+0xFB 0x044B #CYRILLIC SMALL LETTER YERU
+0xFC 0x044C #CYRILLIC SMALL LETTER SOFT SIGN
+0xFD 0x044D #CYRILLIC SMALL LETTER E
+0xFE 0x044E #CYRILLIC SMALL LETTER YU
+0xFF 0x044F #CYRILLIC SMALL LETTER YA
diff --git a/sys/lib/antiword/cp1252.txt b/sys/lib/antiword/cp1252.txt new file mode 100755 index 000000000..970002d90 --- /dev/null +++ b/sys/lib/antiword/cp1252.txt @@ -0,0 +1,274 @@ +#
+# Name: cp1252 to Unicode table
+# Unicode version: 2.0
+# Table version: 2.01
+# Table format: Format A
+# Date: 04/15/98
+#
+# Contact: cpxlate@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp1252 code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp1252 order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0A 0x000A #LINE FEED
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x001A #SUBSTITUTE
+0x1B 0x001B #ESCAPE
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2A 0x002A #ASTERISK
+0x2B 0x002B #PLUS SIGN
+0x2C 0x002C #COMMA
+0x2D 0x002D #HYPHEN-MINUS
+0x2E 0x002E #FULL STOP
+0x2F 0x002F #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3A 0x003A #COLON
+0x3B 0x003B #SEMICOLON
+0x3C 0x003C #LESS-THAN SIGN
+0x3D 0x003D #EQUALS SIGN
+0x3E 0x003E #GREATER-THAN SIGN
+0x3F 0x003F #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4A 0x004A #LATIN CAPITAL LETTER J
+0x4B 0x004B #LATIN CAPITAL LETTER K
+0x4C 0x004C #LATIN CAPITAL LETTER L
+0x4D 0x004D #LATIN CAPITAL LETTER M
+0x4E 0x004E #LATIN CAPITAL LETTER N
+0x4F 0x004F #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5A 0x005A #LATIN CAPITAL LETTER Z
+0x5B 0x005B #LEFT SQUARE BRACKET
+0x5C 0x005C #REVERSE SOLIDUS
+0x5D 0x005D #RIGHT SQUARE BRACKET
+0x5E 0x005E #CIRCUMFLEX ACCENT
+0x5F 0x005F #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6A 0x006A #LATIN SMALL LETTER J
+0x6B 0x006B #LATIN SMALL LETTER K
+0x6C 0x006C #LATIN SMALL LETTER L
+0x6D 0x006D #LATIN SMALL LETTER M
+0x6E 0x006E #LATIN SMALL LETTER N
+0x6F 0x006F #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7A 0x007A #LATIN SMALL LETTER Z
+0x7B 0x007B #LEFT CURLY BRACKET
+0x7C 0x007C #VERTICAL LINE
+0x7D 0x007D #RIGHT CURLY BRACKET
+0x7E 0x007E #TILDE
+0x7F 0x007F #DELETE
+0x80 0x20AC #EURO SIGN
+0x81 #UNDEFINED
+0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
+0x83 0x0192 #LATIN SMALL LETTER F WITH HOOK
+0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
+0x85 0x2026 #HORIZONTAL ELLIPSIS
+0x86 0x2020 #DAGGER
+0x87 0x2021 #DOUBLE DAGGER
+0x88 0x02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT
+0x89 0x2030 #PER MILLE SIGN
+0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON
+0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8C 0x0152 #LATIN CAPITAL LIGATURE OE
+0x8D #UNDEFINED
+0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON
+0x8F #UNDEFINED
+0x90 #UNDEFINED
+0x91 0x2018 #LEFT SINGLE QUOTATION MARK
+0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
+0x93 0x201C #LEFT DOUBLE QUOTATION MARK
+0x94 0x201D #RIGHT DOUBLE QUOTATION MARK
+0x95 0x2022 #BULLET
+0x96 0x2013 #EN DASH
+0x97 0x2014 #EM DASH
+0x98 0x02DC #SMALL TILDE
+0x99 0x2122 #TRADE MARK SIGN
+0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON
+0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9C 0x0153 #LATIN SMALL LIGATURE OE
+0x9D #UNDEFINED
+0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON
+0x9F 0x0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS
+0xA0 0x00A0 #NO-BREAK SPACE
+0xA1 0x00A1 #INVERTED EXCLAMATION MARK
+0xA2 0x00A2 #CENT SIGN
+0xA3 0x00A3 #POUND SIGN
+0xA4 0x00A4 #CURRENCY SIGN
+0xA5 0x00A5 #YEN SIGN
+0xA6 0x00A6 #BROKEN BAR
+0xA7 0x00A7 #SECTION SIGN
+0xA8 0x00A8 #DIAERESIS
+0xA9 0x00A9 #COPYRIGHT SIGN
+0xAA 0x00AA #FEMININE ORDINAL INDICATOR
+0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC #NOT SIGN
+0xAD 0x00AD #SOFT HYPHEN
+0xAE 0x00AE #REGISTERED SIGN
+0xAF 0x00AF #MACRON
+0xB0 0x00B0 #DEGREE SIGN
+0xB1 0x00B1 #PLUS-MINUS SIGN
+0xB2 0x00B2 #SUPERSCRIPT TWO
+0xB3 0x00B3 #SUPERSCRIPT THREE
+0xB4 0x00B4 #ACUTE ACCENT
+0xB5 0x00B5 #MICRO SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00B7 #MIDDLE DOT
+0xB8 0x00B8 #CEDILLA
+0xB9 0x00B9 #SUPERSCRIPT ONE
+0xBA 0x00BA #MASCULINE ORDINAL INDICATOR
+0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x00BC #VULGAR FRACTION ONE QUARTER
+0xBD 0x00BD #VULGAR FRACTION ONE HALF
+0xBE 0x00BE #VULGAR FRACTION THREE QUARTERS
+0xBF 0x00BF #INVERTED QUESTION MARK
+0xC0 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE
+0xC1 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE
+0xC2 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE
+0xC4 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6 0x00C6 #LATIN CAPITAL LETTER AE
+0xC7 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE
+0xC9 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE
+0xCA 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE
+0xCD 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE
+0xCE 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0 0x00D0 #LATIN CAPITAL LETTER ETH
+0xD1 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE
+0xD2 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE
+0xD3 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE
+0xD4 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE
+0xD6 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7 0x00D7 #MULTIPLICATION SIGN
+0xD8 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE
+0xD9 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE
+0xDA 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE
+0xDB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE 0x00DE #LATIN CAPITAL LETTER THORN
+0xDF 0x00DF #LATIN SMALL LETTER SHARP S
+0xE0 0x00E0 #LATIN SMALL LETTER A WITH GRAVE
+0xE1 0x00E1 #LATIN SMALL LETTER A WITH ACUTE
+0xE2 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3 0x00E3 #LATIN SMALL LETTER A WITH TILDE
+0xE4 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS
+0xE5 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE
+0xE6 0x00E6 #LATIN SMALL LETTER AE
+0xE7 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA
+0xE8 0x00E8 #LATIN SMALL LETTER E WITH GRAVE
+0xE9 0x00E9 #LATIN SMALL LETTER E WITH ACUTE
+0xEA 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS
+0xEC 0x00EC #LATIN SMALL LETTER I WITH GRAVE
+0xED 0x00ED #LATIN SMALL LETTER I WITH ACUTE
+0xEE 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS
+0xF0 0x00F0 #LATIN SMALL LETTER ETH
+0xF1 0x00F1 #LATIN SMALL LETTER N WITH TILDE
+0xF2 0x00F2 #LATIN SMALL LETTER O WITH GRAVE
+0xF3 0x00F3 #LATIN SMALL LETTER O WITH ACUTE
+0xF4 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5 0x00F5 #LATIN SMALL LETTER O WITH TILDE
+0xF6 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS
+0xF7 0x00F7 #DIVISION SIGN
+0xF8 0x00F8 #LATIN SMALL LETTER O WITH STROKE
+0xF9 0x00F9 #LATIN SMALL LETTER U WITH GRAVE
+0xFA 0x00FA #LATIN SMALL LETTER U WITH ACUTE
+0xFB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS
+0xFD 0x00FD #LATIN SMALL LETTER Y WITH ACUTE
+0xFE 0x00FE #LATIN SMALL LETTER THORN
+0xFF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/sys/lib/antiword/cp437.txt b/sys/lib/antiword/cp437.txt new file mode 100755 index 000000000..ae38e17ab --- /dev/null +++ b/sys/lib/antiword/cp437.txt @@ -0,0 +1,273 @@ +# +# Name: cp437_DOSLatinUS to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp437_DOSLatinUS code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp437_DOSLatinUS order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2a 0x002a #ASTERISK +0x2b 0x002b #PLUS SIGN +0x2c 0x002c #COMMA +0x2d 0x002d #HYPHEN-MINUS +0x2e 0x002e #FULL STOP +0x2f 0x002f #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3a 0x003a #COLON +0x3b 0x003b #SEMICOLON +0x3c 0x003c #LESS-THAN SIGN +0x3d 0x003d #EQUALS SIGN +0x3e 0x003e #GREATER-THAN SIGN +0x3f 0x003f #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4a 0x004a #LATIN CAPITAL LETTER J +0x4b 0x004b #LATIN CAPITAL LETTER K +0x4c 0x004c #LATIN CAPITAL LETTER L +0x4d 0x004d #LATIN CAPITAL LETTER M +0x4e 0x004e #LATIN CAPITAL LETTER N +0x4f 0x004f #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5a 0x005a #LATIN CAPITAL LETTER Z +0x5b 0x005b #LEFT SQUARE BRACKET +0x5c 0x005c #REVERSE SOLIDUS +0x5d 0x005d #RIGHT SQUARE BRACKET +0x5e 0x005e #CIRCUMFLEX ACCENT +0x5f 0x005f #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6a 0x006a #LATIN SMALL LETTER J +0x6b 0x006b #LATIN SMALL LETTER K +0x6c 0x006c #LATIN SMALL LETTER L +0x6d 0x006d #LATIN SMALL LETTER M +0x6e 0x006e #LATIN SMALL LETTER N +0x6f 0x006f #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7a 0x007a #LATIN SMALL LETTER Z +0x7b 0x007b #LEFT CURLY BRACKET +0x7c 0x007c #VERTICAL LINE +0x7d 0x007d #RIGHT CURLY BRACKET +0x7e 0x007e #TILDE +0x7f 0x007f #DELETE +0x80 0x00c7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x81 0x00fc #LATIN SMALL LETTER U WITH DIAERESIS +0x82 0x00e9 #LATIN SMALL LETTER E WITH ACUTE +0x83 0x00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x84 0x00e4 #LATIN SMALL LETTER A WITH DIAERESIS +0x85 0x00e0 #LATIN SMALL LETTER A WITH GRAVE +0x86 0x00e5 #LATIN SMALL LETTER A WITH RING ABOVE +0x87 0x00e7 #LATIN SMALL LETTER C WITH CEDILLA +0x88 0x00ea #LATIN SMALL LETTER E WITH CIRCUMFLEX +0x89 0x00eb #LATIN SMALL LETTER E WITH DIAERESIS +0x8a 0x00e8 #LATIN SMALL LETTER E WITH GRAVE +0x8b 0x00ef #LATIN SMALL LETTER I WITH DIAERESIS +0x8c 0x00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x8d 0x00ec #LATIN SMALL LETTER I WITH GRAVE +0x8e 0x00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x8f 0x00c5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0x90 0x00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x91 0x00e6 #LATIN SMALL LIGATURE AE +0x92 0x00c6 #LATIN CAPITAL LIGATURE AE +0x93 0x00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0x94 0x00f6 #LATIN SMALL LETTER O WITH DIAERESIS +0x95 0x00f2 #LATIN SMALL LETTER O WITH GRAVE +0x96 0x00fb #LATIN SMALL LETTER U WITH CIRCUMFLEX +0x97 0x00f9 #LATIN SMALL LETTER U WITH GRAVE +0x98 0x00ff #LATIN SMALL LETTER Y WITH DIAERESIS +0x99 0x00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0x9a 0x00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9b 0x00a2 #CENT SIGN +0x9c 0x00a3 #POUND SIGN +0x9d 0x00a5 #YEN SIGN +0x9e 0x20a7 #PESETA SIGN +0x9f 0x0192 #LATIN SMALL LETTER F WITH HOOK +0xa0 0x00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 0x00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 0x00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 0x00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 0x00f1 #LATIN SMALL LETTER N WITH TILDE +0xa5 0x00d1 #LATIN CAPITAL LETTER N WITH TILDE +0xa6 0x00aa #FEMININE ORDINAL INDICATOR +0xa7 0x00ba #MASCULINE ORDINAL INDICATOR +0xa8 0x00bf #INVERTED QUESTION MARK +0xa9 0x2310 #REVERSED NOT SIGN +0xaa 0x00ac #NOT SIGN +0xab 0x00bd #VULGAR FRACTION ONE HALF +0xac 0x00bc #VULGAR FRACTION ONE QUARTER +0xad 0x00a1 #INVERTED EXCLAMATION MARK +0xae 0x00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf 0x00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 0x2591 #LIGHT SHADE +0xb1 0x2592 #MEDIUM SHADE +0xb2 0x2593 #DARK SHADE +0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 0x2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 0x2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 0x2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 0x2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd 0x255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe 0x255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 0x255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 0x255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf 0x2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 0x2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 0x2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 0x2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 0x2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 0x2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 0x2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 0x2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 0x256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 0x256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb 0x2588 #FULL BLOCK +0xdc 0x2584 #LOWER HALF BLOCK +0xdd 0x258c #LEFT HALF BLOCK +0xde 0x2590 #RIGHT HALF BLOCK +0xdf 0x2580 #UPPER HALF BLOCK +0xe0 0x03b1 #GREEK SMALL LETTER ALPHA +0xe1 0x00df #LATIN SMALL LETTER SHARP S +0xe2 0x0393 #GREEK CAPITAL LETTER GAMMA +0xe3 0x03c0 #GREEK SMALL LETTER PI +0xe4 0x03a3 #GREEK CAPITAL LETTER SIGMA +0xe5 0x03c3 #GREEK SMALL LETTER SIGMA +0xe6 0x00b5 #MICRO SIGN +0xe7 0x03c4 #GREEK SMALL LETTER TAU +0xe8 0x03a6 #GREEK CAPITAL LETTER PHI +0xe9 0x0398 #GREEK CAPITAL LETTER THETA +0xea 0x03a9 #GREEK CAPITAL LETTER OMEGA +0xeb 0x03b4 #GREEK SMALL LETTER DELTA +0xec 0x221e #INFINITY +0xed 0x03c6 #GREEK SMALL LETTER PHI +0xee 0x03b5 #GREEK SMALL LETTER EPSILON +0xef 0x2229 #INTERSECTION +0xf0 0x2261 #IDENTICAL TO +0xf1 0x00b1 #PLUS-MINUS SIGN +0xf2 0x2265 #GREATER-THAN OR EQUAL TO +0xf3 0x2264 #LESS-THAN OR EQUAL TO +0xf4 0x2320 #TOP HALF INTEGRAL +0xf5 0x2321 #BOTTOM HALF INTEGRAL +0xf6 0x00f7 #DIVISION SIGN +0xf7 0x2248 #ALMOST EQUAL TO +0xf8 0x00b0 #DEGREE SIGN +0xf9 0x2219 #BULLET OPERATOR +0xfa 0x00b7 #MIDDLE DOT +0xfb 0x221a #SQUARE ROOT +0xfc 0x207f #SUPERSCRIPT LATIN SMALL LETTER N +0xfd 0x00b2 #SUPERSCRIPT TWO +0xfe 0x25a0 #BLACK SQUARE +0xff 0x00a0 #NO-BREAK SPACE diff --git a/sys/lib/antiword/cp850.txt b/sys/lib/antiword/cp850.txt new file mode 100755 index 000000000..590b1afe5 --- /dev/null +++ b/sys/lib/antiword/cp850.txt @@ -0,0 +1,273 @@ +# +# Name: cp850_DOSLatin1 to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp850_DOSLatin1 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp850_DOSLatin1 order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2a 0x002a #ASTERISK +0x2b 0x002b #PLUS SIGN +0x2c 0x002c #COMMA +0x2d 0x002d #HYPHEN-MINUS +0x2e 0x002e #FULL STOP +0x2f 0x002f #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3a 0x003a #COLON +0x3b 0x003b #SEMICOLON +0x3c 0x003c #LESS-THAN SIGN +0x3d 0x003d #EQUALS SIGN +0x3e 0x003e #GREATER-THAN SIGN +0x3f 0x003f #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4a 0x004a #LATIN CAPITAL LETTER J +0x4b 0x004b #LATIN CAPITAL LETTER K +0x4c 0x004c #LATIN CAPITAL LETTER L +0x4d 0x004d #LATIN CAPITAL LETTER M +0x4e 0x004e #LATIN CAPITAL LETTER N +0x4f 0x004f #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5a 0x005a #LATIN CAPITAL LETTER Z +0x5b 0x005b #LEFT SQUARE BRACKET +0x5c 0x005c #REVERSE SOLIDUS +0x5d 0x005d #RIGHT SQUARE BRACKET +0x5e 0x005e #CIRCUMFLEX ACCENT +0x5f 0x005f #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6a 0x006a #LATIN SMALL LETTER J +0x6b 0x006b #LATIN SMALL LETTER K +0x6c 0x006c #LATIN SMALL LETTER L +0x6d 0x006d #LATIN SMALL LETTER M +0x6e 0x006e #LATIN SMALL LETTER N +0x6f 0x006f #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7a 0x007a #LATIN SMALL LETTER Z +0x7b 0x007b #LEFT CURLY BRACKET +0x7c 0x007c #VERTICAL LINE +0x7d 0x007d #RIGHT CURLY BRACKET +0x7e 0x007e #TILDE +0x7f 0x007f #DELETE +0x80 0x00c7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x81 0x00fc #LATIN SMALL LETTER U WITH DIAERESIS +0x82 0x00e9 #LATIN SMALL LETTER E WITH ACUTE +0x83 0x00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x84 0x00e4 #LATIN SMALL LETTER A WITH DIAERESIS +0x85 0x00e0 #LATIN SMALL LETTER A WITH GRAVE +0x86 0x00e5 #LATIN SMALL LETTER A WITH RING ABOVE +0x87 0x00e7 #LATIN SMALL LETTER C WITH CEDILLA +0x88 0x00ea #LATIN SMALL LETTER E WITH CIRCUMFLEX +0x89 0x00eb #LATIN SMALL LETTER E WITH DIAERESIS +0x8a 0x00e8 #LATIN SMALL LETTER E WITH GRAVE +0x8b 0x00ef #LATIN SMALL LETTER I WITH DIAERESIS +0x8c 0x00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x8d 0x00ec #LATIN SMALL LETTER I WITH GRAVE +0x8e 0x00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x8f 0x00c5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0x90 0x00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x91 0x00e6 #LATIN SMALL LIGATURE AE +0x92 0x00c6 #LATIN CAPITAL LIGATURE AE +0x93 0x00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0x94 0x00f6 #LATIN SMALL LETTER O WITH DIAERESIS +0x95 0x00f2 #LATIN SMALL LETTER O WITH GRAVE +0x96 0x00fb #LATIN SMALL LETTER U WITH CIRCUMFLEX +0x97 0x00f9 #LATIN SMALL LETTER U WITH GRAVE +0x98 0x00ff #LATIN SMALL LETTER Y WITH DIAERESIS +0x99 0x00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0x9a 0x00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9b 0x00f8 #LATIN SMALL LETTER O WITH STROKE +0x9c 0x00a3 #POUND SIGN +0x9d 0x00d8 #LATIN CAPITAL LETTER O WITH STROKE +0x9e 0x00d7 #MULTIPLICATION SIGN +0x9f 0x0192 #LATIN SMALL LETTER F WITH HOOK +0xa0 0x00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 0x00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 0x00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 0x00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 0x00f1 #LATIN SMALL LETTER N WITH TILDE +0xa5 0x00d1 #LATIN CAPITAL LETTER N WITH TILDE +0xa6 0x00aa #FEMININE ORDINAL INDICATOR +0xa7 0x00ba #MASCULINE ORDINAL INDICATOR +0xa8 0x00bf #INVERTED QUESTION MARK +0xa9 0x00ae #REGISTERED SIGN +0xaa 0x00ac #NOT SIGN +0xab 0x00bd #VULGAR FRACTION ONE HALF +0xac 0x00bc #VULGAR FRACTION ONE QUARTER +0xad 0x00a1 #INVERTED EXCLAMATION MARK +0xae 0x00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf 0x00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 0x2591 #LIGHT SHADE +0xb1 0x2592 #MEDIUM SHADE +0xb2 0x2593 #DARK SHADE +0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 0x00c1 #LATIN CAPITAL LETTER A WITH ACUTE +0xb6 0x00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xb7 0x00c0 #LATIN CAPITAL LETTER A WITH GRAVE +0xb8 0x00a9 #COPYRIGHT SIGN +0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd 0x00a2 #CENT SIGN +0xbe 0x00a5 #YEN SIGN +0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 0x00e3 #LATIN SMALL LETTER A WITH TILDE +0xc7 0x00c3 #LATIN CAPITAL LETTER A WITH TILDE +0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf 0x00a4 #CURRENCY SIGN +0xd0 0x00f0 #LATIN SMALL LETTER ETH +0xd1 0x00d0 #LATIN CAPITAL LETTER ETH +0xd2 0x00ca #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xd3 0x00cb #LATIN CAPITAL LETTER E WITH DIAERESIS +0xd4 0x00c8 #LATIN CAPITAL LETTER E WITH GRAVE +0xd5 0x0131 #LATIN SMALL LETTER DOTLESS I +0xd6 0x00cd #LATIN CAPITAL LETTER I WITH ACUTE +0xd7 0x00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xd8 0x00cf #LATIN CAPITAL LETTER I WITH DIAERESIS +0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb 0x2588 #FULL BLOCK +0xdc 0x2584 #LOWER HALF BLOCK +0xdd 0x00a6 #BROKEN BAR +0xde 0x00cc #LATIN CAPITAL LETTER I WITH GRAVE +0xdf 0x2580 #UPPER HALF BLOCK +0xe0 0x00d3 #LATIN CAPITAL LETTER O WITH ACUTE +0xe1 0x00df #LATIN SMALL LETTER SHARP S +0xe2 0x00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xe3 0x00d2 #LATIN CAPITAL LETTER O WITH GRAVE +0xe4 0x00f5 #LATIN SMALL LETTER O WITH TILDE +0xe5 0x00d5 #LATIN CAPITAL LETTER O WITH TILDE +0xe6 0x00b5 #MICRO SIGN +0xe7 0x00fe #LATIN SMALL LETTER THORN +0xe8 0x00de #LATIN CAPITAL LETTER THORN +0xe9 0x00da #LATIN CAPITAL LETTER U WITH ACUTE +0xea 0x00db #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xeb 0x00d9 #LATIN CAPITAL LETTER U WITH GRAVE +0xec 0x00fd #LATIN SMALL LETTER Y WITH ACUTE +0xed 0x00dd #LATIN CAPITAL LETTER Y WITH ACUTE +0xee 0x00af #MACRON +0xef 0x00b4 #ACUTE ACCENT +0xf0 0x00ad #SOFT HYPHEN +0xf1 0x00b1 #PLUS-MINUS SIGN +0xf2 0x2017 #DOUBLE LOW LINE +0xf3 0x00be #VULGAR FRACTION THREE QUARTERS +0xf4 0x00b6 #PILCROW SIGN +0xf5 0x00a7 #SECTION SIGN +0xf6 0x00f7 #DIVISION SIGN +0xf7 0x00b8 #CEDILLA +0xf8 0x00b0 #DEGREE SIGN +0xf9 0x00a8 #DIAERESIS +0xfa 0x00b7 #MIDDLE DOT +0xfb 0x00b9 #SUPERSCRIPT ONE +0xfc 0x00b3 #SUPERSCRIPT THREE +0xfd 0x00b2 #SUPERSCRIPT TWO +0xfe 0x25a0 #BLACK SQUARE +0xff 0x00a0 #NO-BREAK SPACE diff --git a/sys/lib/antiword/cp852.txt b/sys/lib/antiword/cp852.txt new file mode 100755 index 000000000..2f2dabaeb --- /dev/null +++ b/sys/lib/antiword/cp852.txt @@ -0,0 +1,273 @@ +# +# Name: cp852_DOSLatin2 to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp852_DOSLatin2 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp852_DOSLatin2 order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2a 0x002a #ASTERISK +0x2b 0x002b #PLUS SIGN +0x2c 0x002c #COMMA +0x2d 0x002d #HYPHEN-MINUS +0x2e 0x002e #FULL STOP +0x2f 0x002f #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3a 0x003a #COLON +0x3b 0x003b #SEMICOLON +0x3c 0x003c #LESS-THAN SIGN +0x3d 0x003d #EQUALS SIGN +0x3e 0x003e #GREATER-THAN SIGN +0x3f 0x003f #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4a 0x004a #LATIN CAPITAL LETTER J +0x4b 0x004b #LATIN CAPITAL LETTER K +0x4c 0x004c #LATIN CAPITAL LETTER L +0x4d 0x004d #LATIN CAPITAL LETTER M +0x4e 0x004e #LATIN CAPITAL LETTER N +0x4f 0x004f #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5a 0x005a #LATIN CAPITAL LETTER Z +0x5b 0x005b #LEFT SQUARE BRACKET +0x5c 0x005c #REVERSE SOLIDUS +0x5d 0x005d #RIGHT SQUARE BRACKET +0x5e 0x005e #CIRCUMFLEX ACCENT +0x5f 0x005f #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6a 0x006a #LATIN SMALL LETTER J +0x6b 0x006b #LATIN SMALL LETTER K +0x6c 0x006c #LATIN SMALL LETTER L +0x6d 0x006d #LATIN SMALL LETTER M +0x6e 0x006e #LATIN SMALL LETTER N +0x6f 0x006f #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7a 0x007a #LATIN SMALL LETTER Z +0x7b 0x007b #LEFT CURLY BRACKET +0x7c 0x007c #VERTICAL LINE +0x7d 0x007d #RIGHT CURLY BRACKET +0x7e 0x007e #TILDE +0x7f 0x007f #DELETE +0x80 0x00c7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x81 0x00fc #LATIN SMALL LETTER U WITH DIAERESIS +0x82 0x00e9 #LATIN SMALL LETTER E WITH ACUTE +0x83 0x00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x84 0x00e4 #LATIN SMALL LETTER A WITH DIAERESIS +0x85 0x016f #LATIN SMALL LETTER U WITH RING ABOVE +0x86 0x0107 #LATIN SMALL LETTER C WITH ACUTE +0x87 0x00e7 #LATIN SMALL LETTER C WITH CEDILLA +0x88 0x0142 #LATIN SMALL LETTER L WITH STROKE +0x89 0x00eb #LATIN SMALL LETTER E WITH DIAERESIS +0x8a 0x0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0x8b 0x0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE +0x8c 0x00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x8d 0x0179 #LATIN CAPITAL LETTER Z WITH ACUTE +0x8e 0x00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x8f 0x0106 #LATIN CAPITAL LETTER C WITH ACUTE +0x90 0x00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x91 0x0139 #LATIN CAPITAL LETTER L WITH ACUTE +0x92 0x013a #LATIN SMALL LETTER L WITH ACUTE +0x93 0x00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0x94 0x00f6 #LATIN SMALL LETTER O WITH DIAERESIS +0x95 0x013d #LATIN CAPITAL LETTER L WITH CARON +0x96 0x013e #LATIN SMALL LETTER L WITH CARON +0x97 0x015a #LATIN CAPITAL LETTER S WITH ACUTE +0x98 0x015b #LATIN SMALL LETTER S WITH ACUTE +0x99 0x00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0x9a 0x00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9b 0x0164 #LATIN CAPITAL LETTER T WITH CARON +0x9c 0x0165 #LATIN SMALL LETTER T WITH CARON +0x9d 0x0141 #LATIN CAPITAL LETTER L WITH STROKE +0x9e 0x00d7 #MULTIPLICATION SIGN +0x9f 0x010d #LATIN SMALL LETTER C WITH CARON +0xa0 0x00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 0x00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 0x00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 0x00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 0x0104 #LATIN CAPITAL LETTER A WITH OGONEK +0xa5 0x0105 #LATIN SMALL LETTER A WITH OGONEK +0xa6 0x017d #LATIN CAPITAL LETTER Z WITH CARON +0xa7 0x017e #LATIN SMALL LETTER Z WITH CARON +0xa8 0x0118 #LATIN CAPITAL LETTER E WITH OGONEK +0xa9 0x0119 #LATIN SMALL LETTER E WITH OGONEK +0xaa 0x00ac #NOT SIGN +0xab 0x017a #LATIN SMALL LETTER Z WITH ACUTE +0xac 0x010c #LATIN CAPITAL LETTER C WITH CARON +0xad 0x015f #LATIN SMALL LETTER S WITH CEDILLA +0xae 0x00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf 0x00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 0x2591 #LIGHT SHADE +0xb1 0x2592 #MEDIUM SHADE +0xb2 0x2593 #DARK SHADE +0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 0x00c1 #LATIN CAPITAL LETTER A WITH ACUTE +0xb6 0x00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xb7 0x011a #LATIN CAPITAL LETTER E WITH CARON +0xb8 0x015e #LATIN CAPITAL LETTER S WITH CEDILLA +0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd 0x017b #LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xbe 0x017c #LATIN SMALL LETTER Z WITH DOT ABOVE +0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 0x0102 #LATIN CAPITAL LETTER A WITH BREVE +0xc7 0x0103 #LATIN SMALL LETTER A WITH BREVE +0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf 0x00a4 #CURRENCY SIGN +0xd0 0x0111 #LATIN SMALL LETTER D WITH STROKE +0xd1 0x0110 #LATIN CAPITAL LETTER D WITH STROKE +0xd2 0x010e #LATIN CAPITAL LETTER D WITH CARON +0xd3 0x00cb #LATIN CAPITAL LETTER E WITH DIAERESIS +0xd4 0x010f #LATIN SMALL LETTER D WITH CARON +0xd5 0x0147 #LATIN CAPITAL LETTER N WITH CARON +0xd6 0x00cd #LATIN CAPITAL LETTER I WITH ACUTE +0xd7 0x00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xd8 0x011b #LATIN SMALL LETTER E WITH CARON +0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb 0x2588 #FULL BLOCK +0xdc 0x2584 #LOWER HALF BLOCK +0xdd 0x0162 #LATIN CAPITAL LETTER T WITH CEDILLA +0xde 0x016e #LATIN CAPITAL LETTER U WITH RING ABOVE +0xdf 0x2580 #UPPER HALF BLOCK +0xe0 0x00d3 #LATIN CAPITAL LETTER O WITH ACUTE +0xe1 0x00df #LATIN SMALL LETTER SHARP S +0xe2 0x00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xe3 0x0143 #LATIN CAPITAL LETTER N WITH ACUTE +0xe4 0x0144 #LATIN SMALL LETTER N WITH ACUTE +0xe5 0x0148 #LATIN SMALL LETTER N WITH CARON +0xe6 0x0160 #LATIN CAPITAL LETTER S WITH CARON +0xe7 0x0161 #LATIN SMALL LETTER S WITH CARON +0xe8 0x0154 #LATIN CAPITAL LETTER R WITH ACUTE +0xe9 0x00da #LATIN CAPITAL LETTER U WITH ACUTE +0xea 0x0155 #LATIN SMALL LETTER R WITH ACUTE +0xeb 0x0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xec 0x00fd #LATIN SMALL LETTER Y WITH ACUTE +0xed 0x00dd #LATIN CAPITAL LETTER Y WITH ACUTE +0xee 0x0163 #LATIN SMALL LETTER T WITH CEDILLA +0xef 0x00b4 #ACUTE ACCENT +0xf0 0x00ad #SOFT HYPHEN +0xf1 0x02dd #DOUBLE ACUTE ACCENT +0xf2 0x02db #OGONEK +0xf3 0x02c7 #CARON +0xf4 0x02d8 #BREVE +0xf5 0x00a7 #SECTION SIGN +0xf6 0x00f7 #DIVISION SIGN +0xf7 0x00b8 #CEDILLA +0xf8 0x00b0 #DEGREE SIGN +0xf9 0x00a8 #DIAERESIS +0xfa 0x02d9 #DOT ABOVE +0xfb 0x0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xfc 0x0158 #LATIN CAPITAL LETTER R WITH CARON +0xfd 0x0159 #LATIN SMALL LETTER R WITH CARON +0xfe 0x25a0 #BLACK SQUARE +0xff 0x00a0 #NO-BREAK SPACE diff --git a/sys/lib/antiword/cp862.txt b/sys/lib/antiword/cp862.txt new file mode 100755 index 000000000..e2a4f47f7 --- /dev/null +++ b/sys/lib/antiword/cp862.txt @@ -0,0 +1,273 @@ +# +# Name: cp862_DOSHebrew to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp862_DOSHebrew code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp862_DOSHebrew order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2a 0x002a #ASTERISK +0x2b 0x002b #PLUS SIGN +0x2c 0x002c #COMMA +0x2d 0x002d #HYPHEN-MINUS +0x2e 0x002e #FULL STOP +0x2f 0x002f #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3a 0x003a #COLON +0x3b 0x003b #SEMICOLON +0x3c 0x003c #LESS-THAN SIGN +0x3d 0x003d #EQUALS SIGN +0x3e 0x003e #GREATER-THAN SIGN +0x3f 0x003f #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4a 0x004a #LATIN CAPITAL LETTER J +0x4b 0x004b #LATIN CAPITAL LETTER K +0x4c 0x004c #LATIN CAPITAL LETTER L +0x4d 0x004d #LATIN CAPITAL LETTER M +0x4e 0x004e #LATIN CAPITAL LETTER N +0x4f 0x004f #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5a 0x005a #LATIN CAPITAL LETTER Z +0x5b 0x005b #LEFT SQUARE BRACKET +0x5c 0x005c #REVERSE SOLIDUS +0x5d 0x005d #RIGHT SQUARE BRACKET +0x5e 0x005e #CIRCUMFLEX ACCENT +0x5f 0x005f #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6a 0x006a #LATIN SMALL LETTER J +0x6b 0x006b #LATIN SMALL LETTER K +0x6c 0x006c #LATIN SMALL LETTER L +0x6d 0x006d #LATIN SMALL LETTER M +0x6e 0x006e #LATIN SMALL LETTER N +0x6f 0x006f #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7a 0x007a #LATIN SMALL LETTER Z +0x7b 0x007b #LEFT CURLY BRACKET +0x7c 0x007c #VERTICAL LINE +0x7d 0x007d #RIGHT CURLY BRACKET +0x7e 0x007e #TILDE +0x7f 0x007f #DELETE +0x80 0x05d0 #HEBREW LETTER ALEF +0x81 0x05d1 #HEBREW LETTER BET +0x82 0x05d2 #HEBREW LETTER GIMEL +0x83 0x05d3 #HEBREW LETTER DALET +0x84 0x05d4 #HEBREW LETTER HE +0x85 0x05d5 #HEBREW LETTER VAV +0x86 0x05d6 #HEBREW LETTER ZAYIN +0x87 0x05d7 #HEBREW LETTER HET +0x88 0x05d8 #HEBREW LETTER TET +0x89 0x05d9 #HEBREW LETTER YOD +0x8a 0x05da #HEBREW LETTER FINAL KAF +0x8b 0x05db #HEBREW LETTER KAF +0x8c 0x05dc #HEBREW LETTER LAMED +0x8d 0x05dd #HEBREW LETTER FINAL MEM +0x8e 0x05de #HEBREW LETTER MEM +0x8f 0x05df #HEBREW LETTER FINAL NUN +0x90 0x05e0 #HEBREW LETTER NUN +0x91 0x05e1 #HEBREW LETTER SAMEKH +0x92 0x05e2 #HEBREW LETTER AYIN +0x93 0x05e3 #HEBREW LETTER FINAL PE +0x94 0x05e4 #HEBREW LETTER PE +0x95 0x05e5 #HEBREW LETTER FINAL TSADI +0x96 0x05e6 #HEBREW LETTER TSADI +0x97 0x05e7 #HEBREW LETTER QOF +0x98 0x05e8 #HEBREW LETTER RESH +0x99 0x05e9 #HEBREW LETTER SHIN +0x9a 0x05ea #HEBREW LETTER TAV +0x9b 0x00a2 #CENT SIGN +0x9c 0x00a3 #POUND SIGN +0x9d 0x00a5 #YEN SIGN +0x9e 0x20a7 #PESETA SIGN +0x9f 0x0192 #LATIN SMALL LETTER F WITH HOOK +0xa0 0x00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 0x00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 0x00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 0x00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 0x00f1 #LATIN SMALL LETTER N WITH TILDE +0xa5 0x00d1 #LATIN CAPITAL LETTER N WITH TILDE +0xa6 0x00aa #FEMININE ORDINAL INDICATOR +0xa7 0x00ba #MASCULINE ORDINAL INDICATOR +0xa8 0x00bf #INVERTED QUESTION MARK +0xa9 0x2310 #REVERSED NOT SIGN +0xaa 0x00ac #NOT SIGN +0xab 0x00bd #VULGAR FRACTION ONE HALF +0xac 0x00bc #VULGAR FRACTION ONE QUARTER +0xad 0x00a1 #INVERTED EXCLAMATION MARK +0xae 0x00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf 0x00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 0x2591 #LIGHT SHADE +0xb1 0x2592 #MEDIUM SHADE +0xb2 0x2593 #DARK SHADE +0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 0x2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 0x2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 0x2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 0x2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd 0x255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe 0x255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 0x255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 0x255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf 0x2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 0x2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 0x2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 0x2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 0x2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 0x2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 0x2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 0x2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 0x256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 0x256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb 0x2588 #FULL BLOCK +0xdc 0x2584 #LOWER HALF BLOCK +0xdd 0x258c #LEFT HALF BLOCK +0xde 0x2590 #RIGHT HALF BLOCK +0xdf 0x2580 #UPPER HALF BLOCK +0xe0 0x03b1 #GREEK SMALL LETTER ALPHA +0xe1 0x00df #LATIN SMALL LETTER SHARP S (GERMAN) +0xe2 0x0393 #GREEK CAPITAL LETTER GAMMA +0xe3 0x03c0 #GREEK SMALL LETTER PI +0xe4 0x03a3 #GREEK CAPITAL LETTER SIGMA +0xe5 0x03c3 #GREEK SMALL LETTER SIGMA +0xe6 0x00b5 #MICRO SIGN +0xe7 0x03c4 #GREEK SMALL LETTER TAU +0xe8 0x03a6 #GREEK CAPITAL LETTER PHI +0xe9 0x0398 #GREEK CAPITAL LETTER THETA +0xea 0x03a9 #GREEK CAPITAL LETTER OMEGA +0xeb 0x03b4 #GREEK SMALL LETTER DELTA +0xec 0x221e #INFINITY +0xed 0x03c6 #GREEK SMALL LETTER PHI +0xee 0x03b5 #GREEK SMALL LETTER EPSILON +0xef 0x2229 #INTERSECTION +0xf0 0x2261 #IDENTICAL TO +0xf1 0x00b1 #PLUS-MINUS SIGN +0xf2 0x2265 #GREATER-THAN OR EQUAL TO +0xf3 0x2264 #LESS-THAN OR EQUAL TO +0xf4 0x2320 #TOP HALF INTEGRAL +0xf5 0x2321 #BOTTOM HALF INTEGRAL +0xf6 0x00f7 #DIVISION SIGN +0xf7 0x2248 #ALMOST EQUAL TO +0xf8 0x00b0 #DEGREE SIGN +0xf9 0x2219 #BULLET OPERATOR +0xfa 0x00b7 #MIDDLE DOT +0xfb 0x221a #SQUARE ROOT +0xfc 0x207f #SUPERSCRIPT LATIN SMALL LETTER N +0xfd 0x00b2 #SUPERSCRIPT TWO +0xfe 0x25a0 #BLACK SQUARE +0xff 0x00a0 #NO-BREAK SPACE diff --git a/sys/lib/antiword/cp866.txt b/sys/lib/antiword/cp866.txt new file mode 100755 index 000000000..94e03b080 --- /dev/null +++ b/sys/lib/antiword/cp866.txt @@ -0,0 +1,273 @@ +# +# Name: cp866_DOSCyrillicRussian to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp866_DOSCyrillicRussian code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp866_DOSCyrillicRussian order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2a 0x002a #ASTERISK +0x2b 0x002b #PLUS SIGN +0x2c 0x002c #COMMA +0x2d 0x002d #HYPHEN-MINUS +0x2e 0x002e #FULL STOP +0x2f 0x002f #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3a 0x003a #COLON +0x3b 0x003b #SEMICOLON +0x3c 0x003c #LESS-THAN SIGN +0x3d 0x003d #EQUALS SIGN +0x3e 0x003e #GREATER-THAN SIGN +0x3f 0x003f #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4a 0x004a #LATIN CAPITAL LETTER J +0x4b 0x004b #LATIN CAPITAL LETTER K +0x4c 0x004c #LATIN CAPITAL LETTER L +0x4d 0x004d #LATIN CAPITAL LETTER M +0x4e 0x004e #LATIN CAPITAL LETTER N +0x4f 0x004f #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5a 0x005a #LATIN CAPITAL LETTER Z +0x5b 0x005b #LEFT SQUARE BRACKET +0x5c 0x005c #REVERSE SOLIDUS +0x5d 0x005d #RIGHT SQUARE BRACKET +0x5e 0x005e #CIRCUMFLEX ACCENT +0x5f 0x005f #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6a 0x006a #LATIN SMALL LETTER J +0x6b 0x006b #LATIN SMALL LETTER K +0x6c 0x006c #LATIN SMALL LETTER L +0x6d 0x006d #LATIN SMALL LETTER M +0x6e 0x006e #LATIN SMALL LETTER N +0x6f 0x006f #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7a 0x007a #LATIN SMALL LETTER Z +0x7b 0x007b #LEFT CURLY BRACKET +0x7c 0x007c #VERTICAL LINE +0x7d 0x007d #RIGHT CURLY BRACKET +0x7e 0x007e #TILDE +0x7f 0x007f #DELETE +0x80 0x0410 #CYRILLIC CAPITAL LETTER A +0x81 0x0411 #CYRILLIC CAPITAL LETTER BE +0x82 0x0412 #CYRILLIC CAPITAL LETTER VE +0x83 0x0413 #CYRILLIC CAPITAL LETTER GHE +0x84 0x0414 #CYRILLIC CAPITAL LETTER DE +0x85 0x0415 #CYRILLIC CAPITAL LETTER IE +0x86 0x0416 #CYRILLIC CAPITAL LETTER ZHE +0x87 0x0417 #CYRILLIC CAPITAL LETTER ZE +0x88 0x0418 #CYRILLIC CAPITAL LETTER I +0x89 0x0419 #CYRILLIC CAPITAL LETTER SHORT I +0x8a 0x041a #CYRILLIC CAPITAL LETTER KA +0x8b 0x041b #CYRILLIC CAPITAL LETTER EL +0x8c 0x041c #CYRILLIC CAPITAL LETTER EM +0x8d 0x041d #CYRILLIC CAPITAL LETTER EN +0x8e 0x041e #CYRILLIC CAPITAL LETTER O +0x8f 0x041f #CYRILLIC CAPITAL LETTER PE +0x90 0x0420 #CYRILLIC CAPITAL LETTER ER +0x91 0x0421 #CYRILLIC CAPITAL LETTER ES +0x92 0x0422 #CYRILLIC CAPITAL LETTER TE +0x93 0x0423 #CYRILLIC CAPITAL LETTER U +0x94 0x0424 #CYRILLIC CAPITAL LETTER EF +0x95 0x0425 #CYRILLIC CAPITAL LETTER HA +0x96 0x0426 #CYRILLIC CAPITAL LETTER TSE +0x97 0x0427 #CYRILLIC CAPITAL LETTER CHE +0x98 0x0428 #CYRILLIC CAPITAL LETTER SHA +0x99 0x0429 #CYRILLIC CAPITAL LETTER SHCHA +0x9a 0x042a #CYRILLIC CAPITAL LETTER HARD SIGN +0x9b 0x042b #CYRILLIC CAPITAL LETTER YERU +0x9c 0x042c #CYRILLIC CAPITAL LETTER SOFT SIGN +0x9d 0x042d #CYRILLIC CAPITAL LETTER E +0x9e 0x042e #CYRILLIC CAPITAL LETTER YU +0x9f 0x042f #CYRILLIC CAPITAL LETTER YA +0xa0 0x0430 #CYRILLIC SMALL LETTER A +0xa1 0x0431 #CYRILLIC SMALL LETTER BE +0xa2 0x0432 #CYRILLIC SMALL LETTER VE +0xa3 0x0433 #CYRILLIC SMALL LETTER GHE +0xa4 0x0434 #CYRILLIC SMALL LETTER DE +0xa5 0x0435 #CYRILLIC SMALL LETTER IE +0xa6 0x0436 #CYRILLIC SMALL LETTER ZHE +0xa7 0x0437 #CYRILLIC SMALL LETTER ZE +0xa8 0x0438 #CYRILLIC SMALL LETTER I +0xa9 0x0439 #CYRILLIC SMALL LETTER SHORT I +0xaa 0x043a #CYRILLIC SMALL LETTER KA +0xab 0x043b #CYRILLIC SMALL LETTER EL +0xac 0x043c #CYRILLIC SMALL LETTER EM +0xad 0x043d #CYRILLIC SMALL LETTER EN +0xae 0x043e #CYRILLIC SMALL LETTER O +0xaf 0x043f #CYRILLIC SMALL LETTER PE +0xb0 0x2591 #LIGHT SHADE +0xb1 0x2592 #MEDIUM SHADE +0xb2 0x2593 #DARK SHADE +0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 0x2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 0x2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 0x2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 0x2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd 0x255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe 0x255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 0x255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 0x255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf 0x2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 0x2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 0x2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 0x2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 0x2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 0x2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 0x2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 0x2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 0x256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 0x256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb 0x2588 #FULL BLOCK +0xdc 0x2584 #LOWER HALF BLOCK +0xdd 0x258c #LEFT HALF BLOCK +0xde 0x2590 #RIGHT HALF BLOCK +0xdf 0x2580 #UPPER HALF BLOCK +0xe0 0x0440 #CYRILLIC SMALL LETTER ER +0xe1 0x0441 #CYRILLIC SMALL LETTER ES +0xe2 0x0442 #CYRILLIC SMALL LETTER TE +0xe3 0x0443 #CYRILLIC SMALL LETTER U +0xe4 0x0444 #CYRILLIC SMALL LETTER EF +0xe5 0x0445 #CYRILLIC SMALL LETTER HA +0xe6 0x0446 #CYRILLIC SMALL LETTER TSE +0xe7 0x0447 #CYRILLIC SMALL LETTER CHE +0xe8 0x0448 #CYRILLIC SMALL LETTER SHA +0xe9 0x0449 #CYRILLIC SMALL LETTER SHCHA +0xea 0x044a #CYRILLIC SMALL LETTER HARD SIGN +0xeb 0x044b #CYRILLIC SMALL LETTER YERU +0xec 0x044c #CYRILLIC SMALL LETTER SOFT SIGN +0xed 0x044d #CYRILLIC SMALL LETTER E +0xee 0x044e #CYRILLIC SMALL LETTER YU +0xef 0x044f #CYRILLIC SMALL LETTER YA +0xf0 0x0401 #CYRILLIC CAPITAL LETTER IO +0xf1 0x0451 #CYRILLIC SMALL LETTER IO +0xf2 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xf3 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE +0xf4 0x0407 #CYRILLIC CAPITAL LETTER YI +0xf5 0x0457 #CYRILLIC SMALL LETTER YI +0xf6 0x040e #CYRILLIC CAPITAL LETTER SHORT U +0xf7 0x045e #CYRILLIC SMALL LETTER SHORT U +0xf8 0x00b0 #DEGREE SIGN +0xf9 0x2219 #BULLET OPERATOR +0xfa 0x00b7 #MIDDLE DOT +0xfb 0x221a #SQUARE ROOT +0xfc 0x2116 #NUMERO SIGN +0xfd 0x00a4 #CURRENCY SIGN +0xfe 0x25a0 #BLACK SQUARE +0xff 0x00a0 #NO-BREAK SPACE diff --git a/sys/lib/antiword/fontnames b/sys/lib/antiword/fontnames new file mode 100755 index 000000000..bb710d2b6 --- /dev/null +++ b/sys/lib/antiword/fontnames @@ -0,0 +1,117 @@ +# Default fontnames translation table +# uses only Standard PostScript (TM) fonts +# +# MS-Word fontname, Italic, Bold, PostScript fontname, Special +Arial, 0, 0, Helvetica, 0 +Arial, 0, 1, Helvetica-Bold, 0 +Arial, 1, 0, Helvetica-Oblique, 0 +Arial, 1, 1, Helvetica-BoldOblique, 0 +Arial Black, 0, 0, Helvetica, 0 +Arial Black, 0, 1, Helvetica-Bold, 0 +Arial Black, 1, 0, Helvetica-Oblique, 0 +Arial Black, 1, 1, Helvetica-BoldOblique, 0 +Arial CE, 0, 0, Helvetica, 0 +Arial CE, 0, 1, Helvetica-Bold, 0 +Arial CE, 1, 0, Helvetica-Oblique, 0 +Arial CE, 1, 1, Helvetica-BoldOblique, 0 +Arial Narrow, 0, 0, Helvetica-Narrow, 0 +Arial Narrow, 0, 1, Helvetica-Narrow-Bold, 0 +Arial Narrow, 1, 0, Helvetica-Narrow-Oblique, 0 +Arial Narrow, 1, 1, Helvetica-Narrow-BoldOblique, 0 +AvantGarde, 0, 0, AvantGarde-Book, 0 +AvantGarde, 0, 1, AvantGarde-Demi, 0 +AvantGarde, 1, 0, AvantGarde-BookOblique, 0 +AvantGarde, 1, 1, AvantGarde-DemiOblique, 0 +Bookman Old Style, 0, 0, Bookman-Light, 0 +Bookman Old Style, 0, 1, Bookman-Demi, 0 +Bookman Old Style, 1, 0, Bookman-LightItalic, 0 +Bookman Old Style, 1, 1, Bookman-DemiItalic, 0 +Century Schoolbook, 0, 0, NewCenturySchlbk-Roman, 0 +Century Schoolbook, 0, 1, NewCenturySchlbk-Bold, 0 +Century Schoolbook, 1, 0, NewCenturySchlbk-Italic, 0 +Century Schoolbook, 1, 1, NewCenturySchlbk-BoldItalic, 0 +CG Omega, 0, 0, Helvetica, 0 +CG Omega, 0, 1, Helvetica-Bold, 0 +CG Omega, 1, 0, Helvetica-Oblique, 0 +CG Omega, 1, 1, Helvetica-BoldOblique, 0 +Comic Sans MS, 0, 0, Helvetica, 0 +Comic Sans MS, 0, 1, Helvetica-Bold, 0 +Comic Sans MS, 1, 0, Helvetica-Oblique, 0 +Comic Sans MS, 1, 1, Helvetica-BoldOblique, 0 +Courier, 0, 0, Courier, 0 +Courier, 0, 1, Courier-Bold, 0 +Courier, 1, 0, Courier-Oblique, 0 +Courier, 1, 1, Courier-BoldOblique, 0 +Courier New, 0, 0, Courier, 0 +Courier New, 0, 1, Courier-Bold, 0 +Courier New, 1, 0, Courier-Oblique, 0 +Courier New, 1, 1, Courier-BoldOblique, 0 +Fixedsys, 0, 0, Courier, 0 +Fixedsys, 0, 1, Courier-Bold, 0 +Fixedsys, 1, 0, Courier-Oblique, 0 +Fixedsys, 1, 1, Courier-BoldOblique, 0 +Helvetica, 0, 0, Helvetica, 0 +Helvetica, 0, 1, Helvetica-Bold, 0 +Helvetica, 1, 0, Helvetica-Oblique, 0 +Helvetica, 1, 1, Helvetica-BoldOblique, 0 +Helvetica-Narrow, 0, 0, Helvetica-Narrow, 0 +Helvetica-Narrow, 0, 1, Helvetica-Narrow-Bold, 0 +Helvetica-Narrow, 1, 0, Helvetica-Narrow-Oblique, 0 +Helvetica-Narrow, 1, 1, Helvetica-Narrow-BoldOblique, 0 +ITC Bookman, 0, 0, Bookman-Light, 0 +ITC Bookman, 0, 1, Bookman-Demi, 0 +ITC Bookman, 1, 0, Bookman-LightItalic, 0 +ITC Bookman, 1, 1, Bookman-DemiItalic, 0 +Lucida Console, 0, 0, Courier, 0 +Lucida Console, 0, 1, Courier-Bold, 0 +Lucida Console, 1, 0, Courier-Oblique, 0 +Lucida Console, 1, 1, Courier-BoldOblique, 0 +Lucida Sans Typewriter, 0, 0, Courier, 0 +Lucida Sans Typewriter, 0, 1, Courier-Bold, 0 +Lucida Sans Typewriter, 1, 0, Courier-Oblique, 0 +Lucida Sans Typewriter, 1, 1, Courier-BoldOblique, 0 +Monotype.com, 0, 0, Courier, 0 +Monotype.com, 0, 1, Courier-Bold, 0 +Monotype.com, 1, 0, Courier-Oblique, 0 +Monotype.com, 1, 1, Courier-BoldOblique, 0 +MS Sans Serif, 0, 0, Helvetica, 0 +MS Sans Serif, 0, 1, Helvetica-Bold, 0 +MS Sans Serif, 1, 0, Helvetica-Oblique, 0 +MS Sans Serif, 1, 1, Helvetica-BoldOblique, 0 +New Century Schlbk, 0, 0, NewCenturySchlbk-Roman, 0 +New Century Schlbk, 0, 1, NewCenturySchlbk-Bold, 0 +New Century Schlbk, 1, 0, NewCenturySchlbk-Italic, 0 +New Century Schlbk, 1, 1, NewCenturySchlbk-BoldItalic, 0 +NewCenturySchlbk, 0, 0, NewCenturySchlbk-Roman, 0 +NewCenturySchlbk, 0, 1, NewCenturySchlbk-Bold, 0 +NewCenturySchlbk, 1, 0, NewCenturySchlbk-Italic, 0 +NewCenturySchlbk, 1, 1, NewCenturySchlbk-BoldItalic, 0 +Palatino, 0, 0, Palatino-Roman, 0 +Palatino, 0, 1, Palatino-Bold, 0 +Palatino, 1, 0, Palatino-Italic, 0 +Palatino, 1, 1, Palatino-BoldItalic, 0 +Swiss, 0, 0, Helvetica, 0 +Swiss, 0, 1, Helvetica-Bold, 0 +Swiss, 1, 0, Helvetica-Oblique, 0 +Swiss, 1, 1, Helvetica-BoldOblique, 0 +Tahoma, 0, 0, Helvetica, 0 +Tahoma, 0, 1, Helvetica-Bold, 0 +Tahoma, 1, 0, Helvetica-Oblique, 0 +Tahoma, 1, 1, Helvetica-BoldOblique, 0 +Trebuchet MS, 0, 0, Helvetica, 0 +Trebuchet MS, 0, 1, Helvetica-Bold, 0 +Trebuchet MS, 1, 0, Helvetica-Oblique, 0 +Trebuchet MS, 1, 1, Helvetica-BoldOblique, 0 +Univers, 0, 0, Helvetica, 0 +Univers, 0, 1, Helvetica-Bold, 0 +Univers, 1, 0, Helvetica-Oblique, 0 +Univers, 1, 1, Helvetica-BoldOblique, 0 +Verdana, 0, 0, Helvetica, 0 +Verdana, 0, 1, Helvetica-Bold, 0 +Verdana, 1, 0, Helvetica-Oblique, 0 +Verdana, 1, 1, Helvetica-BoldOblique, 0 +# All the other fonts +*, 0, 0, Times-Roman, 0 +*, 0, 1, Times-Bold, 0 +*, 1, 0, Times-Italic, 0 +*, 1, 1, Times-BoldItalic, 0 diff --git a/sys/lib/antiword/fontnames.russian b/sys/lib/antiword/fontnames.russian new file mode 100755 index 000000000..fb21c56db --- /dev/null +++ b/sys/lib/antiword/fontnames.russian @@ -0,0 +1,43 @@ +# Default fontnames translation table +# for Cyrillic +# +# by: Dmitry Chernyak <cdl@inkasbank.ru> +# +# MS-Word fontname, Italic, Bold, PostScript fontname, Special +Arial, 0, 0, ArialCyrMT, 0 +Arial, 0, 1, ArialCyrMT-Bold, 0 +Arial, 1, 0, ArialCyrMT-Italic, 0 +Arial, 1, 1, ArialCyrMT-BoldItalic, 0 +Courier, 0, 0, CourierCyrPS, 0 +Courier, 0, 1, CourierCyrPS-Bold, 0 +Courier, 1, 0, CourierCyrPS-Inclined, 0 +Courier, 1, 1, CourierCyrPS-BoldInclined, 0 +Courier New, 0, 0, CourierCyrPS, 0 +Courier New, 0, 1, CourierCyrPS-Bold, 0 +Courier New, 1, 0, CourierCyrPS-Inclined, 0 +Courier New, 1, 1, CourierCyrPS-BoldInclined, 0 +Fixedsys, 0, 0, CourierCyrPS, 0 +Fixedsys, 0, 1, CourierCyrPS-Bold, 0 +Fixedsys, 1, 0, CourierCyrPS-Inclined, 0 +Fixedsys, 1, 1, CourierCyrPS-BoldInclined, 0 +Helvetica, 0, 0, ArialCyrMT, 0 +Helvetica, 0, 1, ArialCyrMT-Bold, 0 +Helvetica, 1, 0, ArialCyrMT-Italic, 0 +Helvetica, 1, 1, ArialCyrMT-BoldItalic, 0 +Lucida Console, 0, 0, CourierCyrPS, 0 +Lucida Console, 0, 1, CourierCyrPS-Bold, 0 +Lucida Console, 1, 0, CourierCyrPS-Inclined, 0 +Lucida Console, 1, 1, CourierCyrPS-BoldInclined, 0 +Swiss, 0, 0, Helvetica, 0 +Swiss, 0, 1, Helvetica-Bold, 0 +Swiss, 1, 0, Helvetica-Oblique, 0 +Swiss, 1, 1, Helvetica-BoldOblique, 0 +Univers, 0, 0, Helvetica, 0 +Univers, 0, 1, Helvetica-Bold, 0 +Univers, 1, 0, Helvetica-Oblique, 0 +Univers, 1, 1, Helvetica-BoldOblique, 0 +# All the other fonts +*, 0, 0, TimesNRCyrMT, 0 +*, 0, 1, TimesNRCyrMT-Bold, 0 +*, 1, 0, TimesNRCyrMT-Inclined, 0 +*, 1, 1, TimesNRCyrMT-BoldInclined, 0 diff --git a/sys/lib/antiword/koi8-r.txt b/sys/lib/antiword/koi8-r.txt new file mode 100755 index 000000000..510561005 --- /dev/null +++ b/sys/lib/antiword/koi8-r.txt @@ -0,0 +1,302 @@ +# +# Name: KOI8-R (RFC1489) to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 18 August 1999 +# Authors: Helmut Richter <richter@lrz.de> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# KOI8-R characters map into Unicode. The underlying document is the +# mapping described in RFC 1489. No statements are made as to whether +# this mapping is the same as the mapping defined as "Code Page 878" +# with some vendors. +# +# Format: Three tab-separated columns +# Column #1 is the KOI8-R code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in KOI8-R order. +# +# Version history +# 1.0 version: created. +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 # UPPER HALF BLOCK +0x8C 0x2584 # LOWER HALF BLOCK +0x8D 0x2588 # FULL BLOCK +0x8E 0x258C # LEFT HALF BLOCK +0x8F 0x2590 # RIGHT HALF BLOCK +0x90 0x2591 # LIGHT SHADE +0x91 0x2592 # MEDIUM SHADE +0x92 0x2593 # DARK SHADE +0x93 0x2320 # TOP HALF INTEGRAL +0x94 0x25A0 # BLACK SQUARE +0x95 0x2219 # BULLET OPERATOR +0x96 0x221A # SQUARE ROOT +0x97 0x2248 # ALMOST EQUAL TO +0x98 0x2264 # LESS-THAN OR EQUAL TO +0x99 0x2265 # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 # NO-BREAK SPACE +0x9B 0x2321 # BOTTOM HALF INTEGRAL +0x9C 0x00B0 # DEGREE SIGN +0x9D 0x00B2 # SUPERSCRIPT TWO +0x9E 0x00B7 # MIDDLE DOT +0x9F 0x00F7 # DIVISION SIGN +0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 # CYRILLIC SMALL LETTER IO +0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO +0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 # COPYRIGHT SIGN +0xC0 0x044E # CYRILLIC SMALL LETTER YU +0xC1 0x0430 # CYRILLIC SMALL LETTER A +0xC2 0x0431 # CYRILLIC SMALL LETTER BE +0xC3 0x0446 # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 # CYRILLIC SMALL LETTER DE +0xC5 0x0435 # CYRILLIC SMALL LETTER IE +0xC6 0x0444 # CYRILLIC SMALL LETTER EF +0xC7 0x0433 # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 # CYRILLIC SMALL LETTER HA +0xC9 0x0438 # CYRILLIC SMALL LETTER I +0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A # CYRILLIC SMALL LETTER KA +0xCC 0x043B # CYRILLIC SMALL LETTER EL +0xCD 0x043C # CYRILLIC SMALL LETTER EM +0xCE 0x043D # CYRILLIC SMALL LETTER EN +0xCF 0x043E # CYRILLIC SMALL LETTER O +0xD0 0x043F # CYRILLIC SMALL LETTER PE +0xD1 0x044F # CYRILLIC SMALL LETTER YA +0xD2 0x0440 # CYRILLIC SMALL LETTER ER +0xD3 0x0441 # CYRILLIC SMALL LETTER ES +0xD4 0x0442 # CYRILLIC SMALL LETTER TE +0xD5 0x0443 # CYRILLIC SMALL LETTER U +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 # CYRILLIC SMALL LETTER VE +0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 # CYRILLIC SMALL LETTER SHA +0xDC 0x044D # CYRILLIC SMALL LETTER E +0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 # CYRILLIC SMALL LETTER CHE +0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B # CYRILLIC CAPITAL LETTER EL +0xED 0x041C # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E # CYRILLIC CAPITAL LETTER O +0xF0 0x041F # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN diff --git a/sys/lib/antiword/koi8-u.txt b/sys/lib/antiword/koi8-u.txt new file mode 100755 index 000000000..61a5b4029 --- /dev/null +++ b/sys/lib/antiword/koi8-u.txt @@ -0,0 +1,303 @@ +# +# Name: KOI8-U (RFC2319) to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 08 September 2001 +# Authors: Andriy Rysin <arysin@yahoo.com> +# +# Copyright (c) 1991-2001 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# KOI8-U characters map into Unicode. The underlying document is the +# mapping described in RFC 2319. No statements are made as to whether +# this mapping is the same as the mapping defined as "Code Page 878" +# with some vendors. +# +# Format: Three tab-separated columns +# Column #1 is the KOI8-U code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in KOI8-U order. +# +# Version history +# 1.0 version: created. +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 # UPPER HALF BLOCK +0x8C 0x2584 # LOWER HALF BLOCK +0x8D 0x2588 # FULL BLOCK +0x8E 0x258C # LEFT HALF BLOCK +0x8F 0x2590 # RIGHT HALF BLOCK +0x90 0x2591 # LIGHT SHADE +0x91 0x2592 # MEDIUM SHADE +0x92 0x2593 # DARK SHADE +0x93 0x2320 # TOP HALF INTEGRAL +0x94 0x25A0 # BLACK SQUARE +0x95 0x2219 # BULLET OPERATOR +0x96 0x221A # SQUARE ROOT +0x97 0x2248 # ALMOST EQUAL TO +0x98 0x2264 # LESS-THAN OR EQUAL TO +0x99 0x2265 # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 # NO-BREAK SPACE +0x9B 0x2321 # BOTTOM HALF INTEGRAL +0x9C 0x00B0 # DEGREE SIGN +0x9D 0x00B2 # SUPERSCRIPT TWO +0x9E 0x00B7 # MIDDLE DOT +0x9F 0x00F7 # DIVISION SIGN +0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 # CYRILLIC SMALL LETTER IO +0xA4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE +0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xA6 0x0456 # CYRILLIC SMALL LETTER BELORUSSIAN-UKRAINIAN I +0xA7 0x0457 # CYRILLIC SMALL LETTER YI (UKRAINIAN) +0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xAD 0x0491 # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN +0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO +0xB4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xB6 0x0406 # CYRILLIC CAPITAL LETTER BELORUSSIAN-UKRAINIAN I +0xB7 0x0407 # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) +0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xBD 0x0490 # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN +0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 # COPYRIGHT SIGN +0xC0 0x044E # CYRILLIC SMALL LETTER YU +0xC1 0x0430 # CYRILLIC SMALL LETTER A +0xC2 0x0431 # CYRILLIC SMALL LETTER BE +0xC3 0x0446 # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 # CYRILLIC SMALL LETTER DE +0xC5 0x0435 # CYRILLIC SMALL LETTER IE +0xC6 0x0444 # CYRILLIC SMALL LETTER EF +0xC7 0x0433 # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 # CYRILLIC SMALL LETTER HA +0xC9 0x0438 # CYRILLIC SMALL LETTER I +0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A # CYRILLIC SMALL LETTER KA +0xCC 0x043B # CYRILLIC SMALL LETTER EL +0xCD 0x043C # CYRILLIC SMALL LETTER EM +0xCE 0x043D # CYRILLIC SMALL LETTER EN +0xCF 0x043E # CYRILLIC SMALL LETTER O +0xD0 0x043F # CYRILLIC SMALL LETTER PE +0xD1 0x044F # CYRILLIC SMALL LETTER YA +0xD2 0x0440 # CYRILLIC SMALL LETTER ER +0xD3 0x0441 # CYRILLIC SMALL LETTER ES +0xD4 0x0442 # CYRILLIC SMALL LETTER TE +0xD5 0x0443 # CYRILLIC SMALL LETTER U +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 # CYRILLIC SMALL LETTER VE +0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 # CYRILLIC SMALL LETTER SHA +0xDC 0x044D # CYRILLIC SMALL LETTER E +0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 # CYRILLIC SMALL LETTER CHE +0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B # CYRILLIC CAPITAL LETTER EL +0xED 0x041C # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E # CYRILLIC CAPITAL LETTER O +0xF0 0x041F # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN diff --git a/sys/lib/antiword/roman.txt b/sys/lib/antiword/roman.txt new file mode 100755 index 000000000..d3ed19e6f --- /dev/null +++ b/sys/lib/antiword/roman.txt @@ -0,0 +1 @@ +#=======================================================================
# FTP file name: ROMAN.TXT
#
# Contents: Map (external version) from Mac OS Roman
# character set to Unicode 2.1
#
# Copyright: (c) 1994-1999 by Apple Computer, Inc., all rights
# reserved.
#
# Contact: charsets@apple.com
#
# Changes:
#
# b03 1999-Sep-22 Update contact e-mail address. Matches
# internal utom<b4>, ufrm<b3>, and Text
# Encoding Converter version 1.5.
# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change
# mapping of 0xDB from CURRENCY SIGN to
# EURO SIGN. Matches internal utom<b3>,
# ufrm<b3>.
# n08 1998-Feb-05 Minor update to header comments
# n06 1997-Dec-14 Add warning about future changes to 0xDB
# from CURRENCY SIGN to EURO SIGN. Clarify
# some header information
# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>:
# Change standard mapping for 0xBD from U+2126
# to its canonical decomposition, U+03A9.
# n03 1995-Apr-15 First version (after fixing some typos).
# Matches internal ufrm<n9>.
#
# Standard header:
# ----------------
#
# Apple, the Apple logo, and Macintosh are trademarks of Apple
# Computer, Inc., registered in the United States and other countries.
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
# throughout this document, "Macintosh" can be used to refer to
# Macintosh computers and "Unicode" can be used to refer to the
# Unicode standard.
#
# Apple makes no warranty or representation, either express or
# implied, with respect to these tables, their quality, accuracy, or
# fitness for a particular purpose. In no event will Apple be liable
# for direct, indirect, special, incidental, or consequential damages
# resulting from any defect or inaccuracy in this document or the
# accompanying tables.
#
# These mapping tables and character lists are subject to change.
# The latest tables should be available from the following:
#
# <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
# <ftp://dev.apple.com/devworld/Technical_Documentation/Misc._Standards/>
#
# For general information about Mac OS encodings and these mapping
# tables, see the file "README.TXT".
#
# Format:
# -------
#
# Three tab-separated columns;
# '#' begins a comment which continues to the end of the line.
# Column #1 is the Mac OS Roman code (in hex as 0xNN)
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
# Column #3 is a comment containing the Unicode name
#
# The entries are in Mac OS Roman code order.
#
# One of these mappings requires the use of a corporate character.
# See the file "CORPCHAR.TXT" and notes below.
#
# Control character mappings are not shown in this table, following
# the conventions of the standard UTC mapping tables. However, the
# Mac OS Roman character set uses the standard control characters at
# 0x00-0x1F and 0x7F.
#
# Notes on Mac OS Roman:
# ----------------------
#
# This character set is used for at least the following Mac OS
# localizations: U.S., British, Canadian French, French, Swiss
# French, German, Swiss German, Italian, Swiss Italian, Dutch,
# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan,
# Portuguese, Brazilian, and the default International system.
#
# Variants of Mac OS Roman are used for Croatian, Icelandic,
# Turkish, Romanian, and other encodings. Separate mapping tables
# are available for these encodings.
#
# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
# Apple fonts are updated for Mac OS 8.5 to reflect this. There is
# a "currency sign" variant of the Mac OS Roman encoding that still
# maps 0xDB to U+00A4; this can be used for older fonts.
# Note: U+20AC is new with Unicode 2.1; for earlier Unicode
# versions, Mac OS Roman 0xDB may be mapped to private-use character
# U+F8A0.
#
# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago,
# New York, Geneva, and Monaco did not implement the full Mac OS
# Roman character set; they only supported character codes up to
# 0xD8. The TrueType versions of these fonts have always implemented
# the full character set, as with the bitmap and TrueType versions
# of the other standard Roman fonts.
#
# In all Mac OS encodings, fonts such as Chicago which are used
# as "system" fonts (for menus, dialogs, etc.) have four glyphs
# at code points 0x11-0x14 for transient use by the Menu Manager.
# These glyphs are not intended as characters for use in normal
# text, and the associated code points are not generally
# interpreted as associated with these glyphs; they are usually
# interpreted (if at all) as the control codes DC1-DC4.
#
# Unicode mapping issues and notes:
# ---------------------------------
#
# The following corporate zone Unicode character is used in this
# mapping:
#
# 0xF8FF Apple logo
#
# NOTE: The graphic image associated with the Apple logo character
# is not authorized for use without permission of Apple, and
# unauthorized use might constitute trademark infringement.
#
# Details of mapping changes in each version:
# -------------------------------------------
#
# Changes from version n08 to version b02:
#
# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
#
# Changes from version n03 to version n04:
#
# - Change mapping of 0xBD from U+2126 to its canonical
# decomposition, U+03A9.
#
##################
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
0x23 0x0023 # NUMBER SIGN
0x24 0x0024 # DOLLAR SIGN
0x25 0x0025 # PERCENT SIGN
0x26 0x0026 # AMPERSAND
0x27 0x0027 # APOSTROPHE
0x28 0x0028 # LEFT PARENTHESIS
0x29 0x0029 # RIGHT PARENTHESIS
0x2A 0x002A # ASTERISK
0x2B 0x002B # PLUS SIGN
0x2C 0x002C # COMMA
0x2D 0x002D # HYPHEN-MINUS
0x2E 0x002E # FULL STOP
0x2F 0x002F # SOLIDUS
0x30 0x0030 # DIGIT ZERO
0x31 0x0031 # DIGIT ONE
0x32 0x0032 # DIGIT TWO
0x33 0x0033 # DIGIT THREE
0x34 0x0034 # DIGIT FOUR
0x35 0x0035 # DIGIT FIVE
0x36 0x0036 # DIGIT SIX
0x37 0x0037 # DIGIT SEVEN
0x38 0x0038 # DIGIT EIGHT
0x39 0x0039 # DIGIT NINE
0x3A 0x003A # COLON
0x3B 0x003B # SEMICOLON
0x3C 0x003C # LESS-THAN SIGN
0x3D 0x003D # EQUALS SIGN
0x3E 0x003E # GREATER-THAN SIGN
0x3F 0x003F # QUESTION MARK
0x40 0x0040 # COMMERCIAL AT
0x41 0x0041 # LATIN CAPITAL LETTER A
0x42 0x0042 # LATIN CAPITAL LETTER B
0x43 0x0043 # LATIN CAPITAL LETTER C
0x44 0x0044 # LATIN CAPITAL LETTER D
0x45 0x0045 # LATIN CAPITAL LETTER E
0x46 0x0046 # LATIN CAPITAL LETTER F
0x47 0x0047 # LATIN CAPITAL LETTER G
0x48 0x0048 # LATIN CAPITAL LETTER H
0x49 0x0049 # LATIN CAPITAL LETTER I
0x4A 0x004A # LATIN CAPITAL LETTER J
0x4B 0x004B # LATIN CAPITAL LETTER K
0x4C 0x004C # LATIN CAPITAL LETTER L
0x4D 0x004D # LATIN CAPITAL LETTER M
0x4E 0x004E # LATIN CAPITAL LETTER N
0x4F 0x004F # LATIN CAPITAL LETTER O
0x50 0x0050 # LATIN CAPITAL LETTER P
0x51 0x0051 # LATIN CAPITAL LETTER Q
0x52 0x0052 # LATIN CAPITAL LETTER R
0x53 0x0053 # LATIN CAPITAL LETTER S
0x54 0x0054 # LATIN CAPITAL LETTER T
0x55 0x0055 # LATIN CAPITAL LETTER U
0x56 0x0056 # LATIN CAPITAL LETTER V
0x57 0x0057 # LATIN CAPITAL LETTER W
0x58 0x0058 # LATIN CAPITAL LETTER X
0x59 0x0059 # LATIN CAPITAL LETTER Y
0x5A 0x005A # LATIN CAPITAL LETTER Z
0x5B 0x005B # LEFT SQUARE BRACKET
0x5C 0x005C # REVERSE SOLIDUS
0x5D 0x005D # RIGHT SQUARE BRACKET
0x5E 0x005E # CIRCUMFLEX ACCENT
0x5F 0x005F # LOW LINE
0x60 0x0060 # GRAVE ACCENT
0x61 0x0061 # LATIN SMALL LETTER A
0x62 0x0062 # LATIN SMALL LETTER B
0x63 0x0063 # LATIN SMALL LETTER C
0x64 0x0064 # LATIN SMALL LETTER D
0x65 0x0065 # LATIN SMALL LETTER E
0x66 0x0066 # LATIN SMALL LETTER F
0x67 0x0067 # LATIN SMALL LETTER G
0x68 0x0068 # LATIN SMALL LETTER H
0x69 0x0069 # LATIN SMALL LETTER I
0x6A 0x006A # LATIN SMALL LETTER J
0x6B 0x006B # LATIN SMALL LETTER K
0x6C 0x006C # LATIN SMALL LETTER L
0x6D 0x006D # LATIN SMALL LETTER M
0x6E 0x006E # LATIN SMALL LETTER N
0x6F 0x006F # LATIN SMALL LETTER O
0x70 0x0070 # LATIN SMALL LETTER P
0x71 0x0071 # LATIN SMALL LETTER Q
0x72 0x0072 # LATIN SMALL LETTER R
0x73 0x0073 # LATIN SMALL LETTER S
0x74 0x0074 # LATIN SMALL LETTER T
0x75 0x0075 # LATIN SMALL LETTER U
0x76 0x0076 # LATIN SMALL LETTER V
0x77 0x0077 # LATIN SMALL LETTER W
0x78 0x0078 # LATIN SMALL LETTER X
0x79 0x0079 # LATIN SMALL LETTER Y
0x7A 0x007A # LATIN SMALL LETTER Z
0x7B 0x007B # LEFT CURLY BRACKET
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x007E # TILDE
#
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
0xA0 0x2020 # DAGGER
0xA1 0x00B0 # DEGREE SIGN
0xA2 0x00A2 # CENT SIGN
0xA3 0x00A3 # POUND SIGN
0xA4 0x00A7 # SECTION SIGN
0xA5 0x2022 # BULLET
0xA6 0x00B6 # PILCROW SIGN
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
0xA8 0x00AE # REGISTERED SIGN
0xA9 0x00A9 # COPYRIGHT SIGN
0xAA 0x2122 # TRADE MARK SIGN
0xAB 0x00B4 # ACUTE ACCENT
0xAC 0x00A8 # DIAERESIS
0xAD 0x2260 # NOT EQUAL TO
0xAE 0x00C6 # LATIN CAPITAL LETTER AE
0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
0xB0 0x221E # INFINITY
0xB1 0x00B1 # PLUS-MINUS SIGN
0xB2 0x2264 # LESS-THAN OR EQUAL TO
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
0xB4 0x00A5 # YEN SIGN
0xB5 0x00B5 # MICRO SIGN
0xB6 0x2202 # PARTIAL DIFFERENTIAL
0xB7 0x2211 # N-ARY SUMMATION
0xB8 0x220F # N-ARY PRODUCT
0xB9 0x03C0 # GREEK SMALL LETTER PI
0xBA 0x222B # INTEGRAL
0xBB 0x00AA # FEMININE ORDINAL INDICATOR
0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
0xBE 0x00E6 # LATIN SMALL LETTER AE
0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
0xC0 0x00BF # INVERTED QUESTION MARK
0xC1 0x00A1 # INVERTED EXCLAMATION MARK
0xC2 0x00AC # NOT SIGN
0xC3 0x221A # SQUARE ROOT
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
0xC5 0x2248 # ALMOST EQUAL TO
0xC6 0x2206 # INCREMENT
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0xC9 0x2026 # HORIZONTAL ELLIPSIS
0xCA 0x00A0 # NO-BREAK SPACE
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
0xCF 0x0153 # LATIN SMALL LIGATURE OE
0xD0 0x2013 # EN DASH
0xD1 0x2014 # EM DASH
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
0xD6 0x00F7 # DIVISION SIGN
0xD7 0x25CA # LOZENGE
0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
0xDA 0x2044 # FRACTION SLASH
0xDB 0x20AC # EURO SIGN
0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0xDE 0xFB01 # LATIN SMALL LIGATURE FI
0xDF 0xFB02 # LATIN SMALL LIGATURE FL
0xE0 0x2021 # DOUBLE DAGGER
0xE1 0x00B7 # MIDDLE DOT
0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
0xE4 0x2030 # PER MILLE SIGN
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0xF0 0xF8FF # Apple logo
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT
0xF7 0x02DC # SMALL TILDE
0xF8 0x00AF # MACRON
0xF9 0x02D8 # BREVE
0xFA 0x02D9 # DOT ABOVE
0xFB 0x02DA # RING ABOVE
0xFC 0x00B8 # CEDILLA
0xFD 0x02DD # DOUBLE ACUTE ACCENT
0xFE 0x02DB # OGONEK
0xFF 0x02C7 # CARON
\ No newline at end of file |