diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/tools/buildindex.py | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/tools/buildindex.py')
-rwxr-xr-x | sys/src/cmd/python/Doc/tools/buildindex.py | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/tools/buildindex.py b/sys/src/cmd/python/Doc/tools/buildindex.py new file mode 100755 index 000000000..1bf3748f1 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/buildindex.py @@ -0,0 +1,388 @@ +#! /usr/bin/env python + +__version__ = '$Revision: 36356 $' + +import os.path +import re +import string +import sys + +from xml.sax.saxutils import quoteattr + + +bang_join = "!".join +null_join = "".join + +REPLACEMENTS = [ + # Hackish way to deal with macros replaced with simple text + (re.compile(r"\\ABC\b"), "ABC"), + (re.compile(r"\\ASCII\b"), "ASCII"), + (re.compile(r"\\Cpp\b"), "C++"), + (re.compile(r"\\EOF\b"), "EOF"), + (re.compile(r"\\NULL\b"), "NULL"), + (re.compile(r"\\POSIX\b"), "POSIX"), + (re.compile(r"\\UNIX\b"), "Unix"), + # deal with turds left over from LaTeX2HTML + (re.compile(r"<#\d+#>"), ""), + ] + +class Node: + continuation = 0 + + def __init__(self, link, str, seqno): + self.links = [link] + self.seqno = seqno + for pattern, replacement in REPLACEMENTS: + str = pattern.sub(replacement, str) + # build up the text + self.text = split_entry_text(str) + self.key = split_entry_key(str) + + def __cmp__(self, other): + """Comparison operator includes sequence number, for use with + list.sort().""" + return self.cmp_entry(other) or cmp(self.seqno, other.seqno) + + def cmp_entry(self, other): + """Comparison 'operator' that ignores sequence number.""" + c = 0 + for i in range(min(len(self.key), len(other.key))): + c = (cmp_part(self.key[i], other.key[i]) + or cmp_part(self.text[i], other.text[i])) + if c: + break + return c or cmp(self.key, other.key) or cmp(self.text, other.text) + + def __repr__(self): + return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno) + + def __str__(self): + return bang_join(self.key) + + def dump(self): + return "%s\1%s###%s\n" \ + % ("\1".join(self.links), + bang_join(self.text), + self.seqno) + + +def cmp_part(s1, s2): + result = cmp(s1, s2) + if result == 0: + return 0 + l1 = s1.lower() + l2 = s2.lower() + minlen = min(len(s1), len(s2)) + if len(s1) < len(s2) and l1 == l2[:len(s1)]: + result = -1 + elif len(s2) < len(s1) and l2 == l1[:len(s2)]: + result = 1 + else: + result = cmp(l1, l2) or cmp(s1, s2) + return result + + +def split_entry(str, which): + stuff = [] + parts = str.split('!') + parts = [part.split('@') for part in parts] + for entry in parts: + if len(entry) != 1: + key = entry[which] + else: + key = entry[0] + stuff.append(key) + return stuff + + +_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""", + re.IGNORECASE) +_rmparens = re.compile(r"\(\)") + +def split_entry_key(str): + parts = split_entry(str, 1) + for i in range(len(parts)): + m = _rmtt.match(parts[i]) + if m: + parts[i] = null_join(m.group(1, 2, 3)) + else: + parts[i] = parts[i].lower() + # remove '()' from the key: + parts[i] = _rmparens.sub('', parts[i]) + return map(trim_ignored_letters, parts) + + +def split_entry_text(str): + if '<' in str: + m = _rmtt.match(str) + if m: + str = null_join(m.group(1, 2, 3)) + return split_entry(str, 1) + + +def load(fp): + nodes = [] + rx = re.compile("(.*)\1(.*)###(.*)$") + while 1: + line = fp.readline() + if not line: + break + m = rx.match(line) + if m: + link, str, seqno = m.group(1, 2, 3) + nodes.append(Node(link, str, seqno)) + return nodes + + +def trim_ignored_letters(s): + # ignore $ to keep environment variables with the + # leading letter from the name + if s.startswith("$"): + return s[1:].lower() + else: + return s.lower() + +def get_first_letter(s): + if s.startswith("<tex2html_percent_mark>"): + return "%" + else: + return trim_ignored_letters(s)[0] + + +def split_letters(nodes): + letter_groups = [] + if nodes: + group = [] + append = group.append + letter = get_first_letter(nodes[0].text[0]) + letter_groups.append((letter, group)) + for node in nodes: + nletter = get_first_letter(node.text[0]) + if letter != nletter: + letter = nletter + group = [] + letter_groups.append((letter, group)) + append = group.append + append(node) + return letter_groups + + +def group_symbols(groups): + entries = [] + ident_letters = string.ascii_letters + "_" + while groups[0][0] not in ident_letters: + entries += groups[0][1] + del groups[0] + if entries: + groups.insert(0, ("Symbols", entries)) + + +# need a function to separate the nodes into columns... +def split_columns(nodes, columns=1): + if columns <= 1: + return [nodes] + # This is a rough height; we may have to increase to avoid breaks before + # a subitem. + colheight = int(len(nodes) / columns) + numlong = int(len(nodes) % columns) + if numlong: + colheight = colheight + 1 + else: + numlong = columns + cols = [] + for i in range(numlong): + start = i * colheight + end = start + colheight + cols.append(nodes[start:end]) + del nodes[:end] + colheight = colheight - 1 + try: + numshort = int(len(nodes) / colheight) + except ZeroDivisionError: + cols = cols + (columns - len(cols)) * [[]] + else: + for i in range(numshort): + start = i * colheight + end = start + colheight + cols.append(nodes[start:end]) + # + # If items continue across columns, make sure they are marked + # as continuations so the user knows to look at the previous column. + # + for i in range(len(cols) - 1): + try: + prev = cols[i][-1] + next = cols[i + 1][0] + except IndexError: + return cols + else: + n = min(len(prev.key), len(next.key)) + for j in range(n): + if prev.key[j] != next.key[j]: + break + next.continuation = j + 1 + return cols + + +DL_LEVEL_INDENT = " " + +def format_column(nodes): + strings = ["<dl compact='compact'>"] + append = strings.append + level = 0 + previous = [] + for node in nodes: + current = node.text + count = 0 + for i in range(min(len(current), len(previous))): + if previous[i] != current[i]: + break + count = i + 1 + if count > level: + append("<dl compact='compact'>" * (count - level) + "\n") + level = count + elif level > count: + append("\n") + append(level * DL_LEVEL_INDENT) + append("</dl>" * (level - count)) + level = count + # else: level == count + for i in range(count, len(current) - 1): + term = node.text[i] + level = level + 1 + if node.continuation > i: + extra = " (continued)" + else: + extra = "" + append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>" + % (term, extra, level * DL_LEVEL_INDENT)) + append("\n%s<dt>%s%s</a>" + % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1])) + for link in node.links[1:]: + append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link)) + previous = current + append("\n") + append("</dl>" * (level + 1)) + return null_join(strings) + + +def format_nodes(nodes, columns=1): + strings = [] + append = strings.append + if columns > 1: + colnos = range(columns) + colheight = int(len(nodes) / columns) + if len(nodes) % columns: + colheight = colheight + 1 + colwidth = int(100 / columns) + append('<table width="100%"><tr valign="top">') + for col in split_columns(nodes, columns): + append('<td width="%d%%">\n' % colwidth) + append(format_column(col)) + append("\n</td>") + append("\n</tr></table>") + else: + append(format_column(nodes)) + return null_join(strings) + + +def format_letter(letter): + if letter == '.': + lettername = ". (dot)" + elif letter == '_': + lettername = "_ (underscore)" + else: + lettername = letter.capitalize() + return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \ + % (quoteattr("letter-" + letter), lettername) + + +def format_html_letters(nodes, columns, group_symbol_nodes): + letter_groups = split_letters(nodes) + if group_symbol_nodes: + group_symbols(letter_groups) + items = [] + for letter, nodes in letter_groups: + s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) + items.append(s) + s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)] + for letter, nodes in letter_groups: + s.append(format_letter(letter)) + s.append(format_nodes(nodes, columns)) + return null_join(s) + +def format_html(nodes, columns): + return format_nodes(nodes, columns) + + +def collapse(nodes): + """Collapse sequences of nodes with matching keys into a single node. + Destructive.""" + if len(nodes) < 2: + return + prev = nodes[0] + i = 1 + while i < len(nodes): + node = nodes[i] + if not node.cmp_entry(prev): + prev.links.append(node.links[0]) + del nodes[i] + else: + i = i + 1 + prev = node + + +def dump(nodes, fp): + for node in nodes: + fp.write(node.dump()) + + +def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0): + nodes.sort() + collapse(nodes) + if letters: + return format_html_letters(nodes, columns, group_symbol_nodes) + else: + return format_html(nodes, columns) + + +def main(): + import getopt + ifn = "-" + ofn = "-" + columns = 1 + letters = 0 + group_symbol_nodes = 1 + opts, args = getopt.getopt(sys.argv[1:], "c:lo:", + ["columns=", "dont-group-symbols", + "group-symbols", "letters", "output="]) + for opt, val in opts: + if opt in ("-o", "--output"): + ofn = val + elif opt in ("-c", "--columns"): + columns = int(val, 10) + elif opt in ("-l", "--letters"): + letters = 1 + elif opt == "--group-symbols": + group_symbol_nodes = 1 + elif opt == "--dont-group-symbols": + group_symbol_nodes = 0 + if not args: + args = [ifn] + nodes = [] + for fn in args: + nodes = nodes + load(open(fn)) + num_nodes = len(nodes) + html = process_nodes(nodes, columns, letters, group_symbol_nodes) + program = os.path.basename(sys.argv[0]) + if ofn == "-": + sys.stdout.write(html) + sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes)) + else: + open(ofn, "w").write(html) + print + print "%s: %d index nodes" % (program, num_nodes) + + +if __name__ == "__main__": + main() |