diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/tools | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/tools')
45 files changed, 10491 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/tools/anno-api.py b/sys/src/cmd/python/Doc/tools/anno-api.py new file mode 100755 index 000000000..68e2ad9ce --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/anno-api.py @@ -0,0 +1,71 @@ +#! /usr/bin/env python +"""Add reference count annotations to the Python/C API Reference.""" +__version__ = '$Revision: 17623 $' + +import getopt +import os +import sys + +import refcounts + + +PREFIX_1 = r"\begin{cfuncdesc}{PyObject*}{" +PREFIX_2 = r"\begin{cfuncdesc}{PyVarObject*}{" + + +def main(): + rcfile = os.path.join(os.path.dirname(refcounts.__file__), os.pardir, + "api", "refcounts.dat") + outfile = "-" + opts, args = getopt.getopt(sys.argv[1:], "o:r:", ["output=", "refcounts="]) + for opt, arg in opts: + if opt in ("-o", "--output"): + outfile = arg + elif opt in ("-r", "--refcounts"): + rcfile = arg + rcdict = refcounts.load(rcfile) + if outfile == "-": + output = sys.stdout + else: + output = open(outfile, "w") + if not args: + args = ["-"] + for infile in args: + if infile == "-": + input = sys.stdin + else: + input = open(infile) + while 1: + line = input.readline() + if not line: + break + prefix = None + if line.startswith(PREFIX_1): + prefix = PREFIX_1 + elif line.startswith(PREFIX_2): + prefix = PREFIX_2 + if prefix: + s = line[len(prefix):].split('}', 1)[0] + try: + info = rcdict[s] + except KeyError: + sys.stderr.write("No refcount data for %s\n" % s) + else: + if info.result_type in ("PyObject*", "PyVarObject*"): + if info.result_refs is None: + rc = "Always \NULL{}" + else: + rc = info.result_refs and "New" or "Borrowed" + rc = rc + " reference" + line = (r"\begin{cfuncdesc}[%s]{%s}{" + % (rc, info.result_type)) \ + + line[len(prefix):] + output.write(line) + if infile != "-": + input.close() + if outfile != "-": + output.close() + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/buildindex.py b/sys/src/cmd/python/Doc/tools/buildindex.py new file mode 100755 index 000000000..1bf3748f1 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/buildindex.py @@ -0,0 +1,388 @@ +#! /usr/bin/env python + +__version__ = '$Revision: 36356 $' + +import os.path +import re +import string +import sys + +from xml.sax.saxutils import quoteattr + + +bang_join = "!".join +null_join = "".join + +REPLACEMENTS = [ + # Hackish way to deal with macros replaced with simple text + (re.compile(r"\\ABC\b"), "ABC"), + (re.compile(r"\\ASCII\b"), "ASCII"), + (re.compile(r"\\Cpp\b"), "C++"), + (re.compile(r"\\EOF\b"), "EOF"), + (re.compile(r"\\NULL\b"), "NULL"), + (re.compile(r"\\POSIX\b"), "POSIX"), + (re.compile(r"\\UNIX\b"), "Unix"), + # deal with turds left over from LaTeX2HTML + (re.compile(r"<#\d+#>"), ""), + ] + +class Node: + continuation = 0 + + def __init__(self, link, str, seqno): + self.links = [link] + self.seqno = seqno + for pattern, replacement in REPLACEMENTS: + str = pattern.sub(replacement, str) + # build up the text + self.text = split_entry_text(str) + self.key = split_entry_key(str) + + def __cmp__(self, other): + """Comparison operator includes sequence number, for use with + list.sort().""" + return self.cmp_entry(other) or cmp(self.seqno, other.seqno) + + def cmp_entry(self, other): + """Comparison 'operator' that ignores sequence number.""" + c = 0 + for i in range(min(len(self.key), len(other.key))): + c = (cmp_part(self.key[i], other.key[i]) + or cmp_part(self.text[i], other.text[i])) + if c: + break + return c or cmp(self.key, other.key) or cmp(self.text, other.text) + + def __repr__(self): + return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno) + + def __str__(self): + return bang_join(self.key) + + def dump(self): + return "%s\1%s###%s\n" \ + % ("\1".join(self.links), + bang_join(self.text), + self.seqno) + + +def cmp_part(s1, s2): + result = cmp(s1, s2) + if result == 0: + return 0 + l1 = s1.lower() + l2 = s2.lower() + minlen = min(len(s1), len(s2)) + if len(s1) < len(s2) and l1 == l2[:len(s1)]: + result = -1 + elif len(s2) < len(s1) and l2 == l1[:len(s2)]: + result = 1 + else: + result = cmp(l1, l2) or cmp(s1, s2) + return result + + +def split_entry(str, which): + stuff = [] + parts = str.split('!') + parts = [part.split('@') for part in parts] + for entry in parts: + if len(entry) != 1: + key = entry[which] + else: + key = entry[0] + stuff.append(key) + return stuff + + +_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""", + re.IGNORECASE) +_rmparens = re.compile(r"\(\)") + +def split_entry_key(str): + parts = split_entry(str, 1) + for i in range(len(parts)): + m = _rmtt.match(parts[i]) + if m: + parts[i] = null_join(m.group(1, 2, 3)) + else: + parts[i] = parts[i].lower() + # remove '()' from the key: + parts[i] = _rmparens.sub('', parts[i]) + return map(trim_ignored_letters, parts) + + +def split_entry_text(str): + if '<' in str: + m = _rmtt.match(str) + if m: + str = null_join(m.group(1, 2, 3)) + return split_entry(str, 1) + + +def load(fp): + nodes = [] + rx = re.compile("(.*)\1(.*)###(.*)$") + while 1: + line = fp.readline() + if not line: + break + m = rx.match(line) + if m: + link, str, seqno = m.group(1, 2, 3) + nodes.append(Node(link, str, seqno)) + return nodes + + +def trim_ignored_letters(s): + # ignore $ to keep environment variables with the + # leading letter from the name + if s.startswith("$"): + return s[1:].lower() + else: + return s.lower() + +def get_first_letter(s): + if s.startswith("<tex2html_percent_mark>"): + return "%" + else: + return trim_ignored_letters(s)[0] + + +def split_letters(nodes): + letter_groups = [] + if nodes: + group = [] + append = group.append + letter = get_first_letter(nodes[0].text[0]) + letter_groups.append((letter, group)) + for node in nodes: + nletter = get_first_letter(node.text[0]) + if letter != nletter: + letter = nletter + group = [] + letter_groups.append((letter, group)) + append = group.append + append(node) + return letter_groups + + +def group_symbols(groups): + entries = [] + ident_letters = string.ascii_letters + "_" + while groups[0][0] not in ident_letters: + entries += groups[0][1] + del groups[0] + if entries: + groups.insert(0, ("Symbols", entries)) + + +# need a function to separate the nodes into columns... +def split_columns(nodes, columns=1): + if columns <= 1: + return [nodes] + # This is a rough height; we may have to increase to avoid breaks before + # a subitem. + colheight = int(len(nodes) / columns) + numlong = int(len(nodes) % columns) + if numlong: + colheight = colheight + 1 + else: + numlong = columns + cols = [] + for i in range(numlong): + start = i * colheight + end = start + colheight + cols.append(nodes[start:end]) + del nodes[:end] + colheight = colheight - 1 + try: + numshort = int(len(nodes) / colheight) + except ZeroDivisionError: + cols = cols + (columns - len(cols)) * [[]] + else: + for i in range(numshort): + start = i * colheight + end = start + colheight + cols.append(nodes[start:end]) + # + # If items continue across columns, make sure they are marked + # as continuations so the user knows to look at the previous column. + # + for i in range(len(cols) - 1): + try: + prev = cols[i][-1] + next = cols[i + 1][0] + except IndexError: + return cols + else: + n = min(len(prev.key), len(next.key)) + for j in range(n): + if prev.key[j] != next.key[j]: + break + next.continuation = j + 1 + return cols + + +DL_LEVEL_INDENT = " " + +def format_column(nodes): + strings = ["<dl compact='compact'>"] + append = strings.append + level = 0 + previous = [] + for node in nodes: + current = node.text + count = 0 + for i in range(min(len(current), len(previous))): + if previous[i] != current[i]: + break + count = i + 1 + if count > level: + append("<dl compact='compact'>" * (count - level) + "\n") + level = count + elif level > count: + append("\n") + append(level * DL_LEVEL_INDENT) + append("</dl>" * (level - count)) + level = count + # else: level == count + for i in range(count, len(current) - 1): + term = node.text[i] + level = level + 1 + if node.continuation > i: + extra = " (continued)" + else: + extra = "" + append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>" + % (term, extra, level * DL_LEVEL_INDENT)) + append("\n%s<dt>%s%s</a>" + % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1])) + for link in node.links[1:]: + append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link)) + previous = current + append("\n") + append("</dl>" * (level + 1)) + return null_join(strings) + + +def format_nodes(nodes, columns=1): + strings = [] + append = strings.append + if columns > 1: + colnos = range(columns) + colheight = int(len(nodes) / columns) + if len(nodes) % columns: + colheight = colheight + 1 + colwidth = int(100 / columns) + append('<table width="100%"><tr valign="top">') + for col in split_columns(nodes, columns): + append('<td width="%d%%">\n' % colwidth) + append(format_column(col)) + append("\n</td>") + append("\n</tr></table>") + else: + append(format_column(nodes)) + return null_join(strings) + + +def format_letter(letter): + if letter == '.': + lettername = ". (dot)" + elif letter == '_': + lettername = "_ (underscore)" + else: + lettername = letter.capitalize() + return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \ + % (quoteattr("letter-" + letter), lettername) + + +def format_html_letters(nodes, columns, group_symbol_nodes): + letter_groups = split_letters(nodes) + if group_symbol_nodes: + group_symbols(letter_groups) + items = [] + for letter, nodes in letter_groups: + s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) + items.append(s) + s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)] + for letter, nodes in letter_groups: + s.append(format_letter(letter)) + s.append(format_nodes(nodes, columns)) + return null_join(s) + +def format_html(nodes, columns): + return format_nodes(nodes, columns) + + +def collapse(nodes): + """Collapse sequences of nodes with matching keys into a single node. + Destructive.""" + if len(nodes) < 2: + return + prev = nodes[0] + i = 1 + while i < len(nodes): + node = nodes[i] + if not node.cmp_entry(prev): + prev.links.append(node.links[0]) + del nodes[i] + else: + i = i + 1 + prev = node + + +def dump(nodes, fp): + for node in nodes: + fp.write(node.dump()) + + +def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0): + nodes.sort() + collapse(nodes) + if letters: + return format_html_letters(nodes, columns, group_symbol_nodes) + else: + return format_html(nodes, columns) + + +def main(): + import getopt + ifn = "-" + ofn = "-" + columns = 1 + letters = 0 + group_symbol_nodes = 1 + opts, args = getopt.getopt(sys.argv[1:], "c:lo:", + ["columns=", "dont-group-symbols", + "group-symbols", "letters", "output="]) + for opt, val in opts: + if opt in ("-o", "--output"): + ofn = val + elif opt in ("-c", "--columns"): + columns = int(val, 10) + elif opt in ("-l", "--letters"): + letters = 1 + elif opt == "--group-symbols": + group_symbol_nodes = 1 + elif opt == "--dont-group-symbols": + group_symbol_nodes = 0 + if not args: + args = [ifn] + nodes = [] + for fn in args: + nodes = nodes + load(open(fn)) + num_nodes = len(nodes) + html = process_nodes(nodes, columns, letters, group_symbol_nodes) + program = os.path.basename(sys.argv[0]) + if ofn == "-": + sys.stdout.write(html) + sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes)) + else: + open(ofn, "w").write(html) + print + print "%s: %d index nodes" % (program, num_nodes) + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/checkargs.pm b/sys/src/cmd/python/Doc/tools/checkargs.pm new file mode 100644 index 000000000..005d3c61f --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/checkargs.pm @@ -0,0 +1,112 @@ +#! /usr/bin/perl + +package checkargs; +require 5.004; # uses "for my $var" +require Exporter; +@ISA = qw(Exporter); +@EXPORT = qw(check_args check_args_range check_args_at_least); +use strict; +use Carp; + +=head1 NAME + +checkargs -- Provide rudimentary argument checking for perl5 functions + +=head1 SYNOPSIS + + check_args(cArgsExpected, @_) + check_args_range(cArgsMin, cArgsMax, @_) + check_args_at_least(cArgsMin, @_) +where "@_" should be supplied literally. + +=head1 DESCRIPTION + +As the first line of user-written subroutine foo, do one of the following: + + my ($arg1, $arg2) = check_args(2, @_); + my ($arg1, @rest) = check_args_range(1, 4, @_); + my ($arg1, @rest) = check_args_at_least(1, @_); + my @args = check_args_at_least(0, @_); + +These functions may also be called for side effect (put a call to one +of the functions near the beginning of the subroutine), but using the +argument checkers to set the argument list is the recommended usage. + +The number of arguments and their definedness are checked; if the wrong +number are received, the program exits with an error message. + +=head1 AUTHOR + +Michael D. Ernst <F<mernst@cs.washington.edu>> + +=cut + +## Need to check that use of caller(1) really gives desired results. +## Need to give input chunk information. +## Is this obviated by Perl 5.003's declarations? Not entirely, I think. + +sub check_args ( $@ ) +{ + my ($num_formals, @args) = @_; + my ($pack, $file_arg, $line_arg, $subname, $hasargs, $wantarr) = caller(1); + if (@_ < 1) { croak "check_args needs at least 7 args, got ", scalar(@_), ": @_\n "; } + if ((!wantarray) && ($num_formals != 0)) + { croak "check_args called in scalar context"; } + # Can't use croak below here: it would only go out to caller, not its caller + my $num_actuals = @args; + if ($num_actuals != $num_formals) + { die "$file_arg:$line_arg: function $subname expected $num_formals argument", + (($num_formals == 1) ? "" : "s"), + ", got $num_actuals", + (($num_actuals == 0) ? "" : ": @args"), + "\n"; } + for my $index (0..$#args) + { if (!defined($args[$index])) + { die "$file_arg:$line_arg: function $subname undefined argument ", $index+1, ": @args[0..$index-1]\n"; } } + return @args; +} + +sub check_args_range ( $$@ ) +{ + my ($min_formals, $max_formals, @args) = @_; + my ($pack, $file_arg, $line_arg, $subname, $hasargs, $wantarr) = caller(1); + if (@_ < 2) { croak "check_args_range needs at least 8 args, got ", scalar(@_), ": @_"; } + if ((!wantarray) && ($max_formals != 0) && ($min_formals !=0) ) + { croak "check_args_range called in scalar context"; } + # Can't use croak below here: it would only go out to caller, not its caller + my $num_actuals = @args; + if (($num_actuals < $min_formals) || ($num_actuals > $max_formals)) + { die "$file_arg:$line_arg: function $subname expected $min_formals-$max_formals arguments, got $num_actuals", + ($num_actuals == 0) ? "" : ": @args", "\n"; } + for my $index (0..$#args) + { if (!defined($args[$index])) + { die "$file_arg:$line_arg: function $subname undefined argument ", $index+1, ": @args[0..$index-1]\n"; } } + return @args; +} + +sub check_args_at_least ( $@ ) +{ + my ($min_formals, @args) = @_; + my ($pack, $file_arg, $line_arg, $subname, $hasargs, $wantarr) = caller(1); + # Don't do this, because we want every sub to start with a call to check_args* + # if ($min_formals == 0) + # { die "Isn't it pointless to check for at least zero args to $subname?\n"; } + if (scalar(@_) < 1) + { croak "check_args_at_least needs at least 1 arg, got ", scalar(@_), ": @_"; } + if ((!wantarray) && ($min_formals != 0)) + { croak "check_args_at_least called in scalar context"; } + # Can't use croak below here: it would only go out to caller, not its caller + my $num_actuals = @args; + if ($num_actuals < $min_formals) + { die "$file_arg:$line_arg: function $subname expected at least $min_formals argument", + ($min_formals == 1) ? "" : "s", + ", got $num_actuals", + ($num_actuals == 0) ? "" : ": @args", "\n"; } + for my $index (0..$#args) + { if (!defined($args[$index])) + { warn "$file_arg:$line_arg: function $subname undefined argument ", $index+1, ": @args[0..$index-1]\n"; last; } } + return @args; +} + +1; # successful import +__END__ diff --git a/sys/src/cmd/python/Doc/tools/cklatex b/sys/src/cmd/python/Doc/tools/cklatex new file mode 100755 index 000000000..396e914c0 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/cklatex @@ -0,0 +1,26 @@ +#! /bin/sh +# -*- ksh -*- + +# This script *helps* locate lines of normal content that end in '}'; +# this is useful since LaTeX2HTML (at least the old version that we +# use) breaks on many lines that end that way. +# +# Usage: cklatex files... | less +# +# *Read* the output looking for suspicious lines! + +grep -n "[^ ]}\$" $@ | \ + grep -v '\\begin{' | \ + grep -v '\\end{' | \ + grep -v '\\input{' | \ + grep -v '\\documentclass{' | \ + grep -v '\\title{' | \ + grep -v '\\chapter{' | \ + grep -v '\\chapter\*{' | \ + grep -v '\\section{' | \ + grep -v '\\subsection{' | \ + grep -v '\\subsubsection{' | \ + grep -v '\\sectionauthor{' | \ + grep -v '\\moduleauthor{' + +exit $? diff --git a/sys/src/cmd/python/Doc/tools/cmpcsyms b/sys/src/cmd/python/Doc/tools/cmpcsyms new file mode 100755 index 000000000..55f99546e --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/cmpcsyms @@ -0,0 +1,157 @@ +#! /usr/bin/env python +from __future__ import with_statement +import errno +import os +import re +import sys +import string + +if __name__ == "__main__": + _base = sys.argv[0] +else: + _base = __file__ + +_script_home = os.path.abspath(os.path.dirname(_base)) + +srcdir = os.path.dirname(os.path.dirname(_script_home)) + +EXCLUDES = ["bitset.h", "cStringIO.h", "graminit.h", "grammar.h", + "longintrepr.h", "metagrammar.h", + "node.h", "opcode.h", "osdefs.h", "pgenheaders.h", + "py_curses.h", "parsetok.h", "symtable.h", "token.h"] + + +def list_headers(): + """Return a list of headers.""" + incdir = os.path.join(srcdir, "Include") + return [os.path.join(incdir, fn) for fn in os.listdir(incdir) + if fn.endswith(".h") and fn not in EXCLUDES] + + +def matcher(pattern): + return re.compile(pattern).search + +MATCHERS = [ + # XXX this should also deal with ctypedesc, cvardesc and cmemberdesc + matcher(r"\\begin\{cfuncdesc\}\{(?P<result>[^}]*)\}\{(?P<sym>[^}]*)\}{(?P<params>[^}]*)\}"), + matcher(r"\\cfuncline\{(?P<result>[^})]*)\}\{(?P<sym>[^}]*)\}{(?P<params>[^}]*)\}"), + ] + +def list_documented_items(): + """Return a list of everything that's already documented.""" + apidir = os.path.join(srcdir, "Doc", "api") + files = [fn for fn in os.listdir(apidir) if fn.endswith(".tex")] + L = [] + for fn in files: + fullname = os.path.join(apidir, fn) + data = open(fullname).read() + for matcher in MATCHERS: + pos = 0 + while 1: + m = matcher(data, pos) + if not m: break + pos = m.end() + sym = m.group("sym") + result = m.group("result") + params = m.group("params") + # replace all whitespace with a single one + params = " ".join(params.split()) + L.append((sym, result, params, fn)) + return L + +def normalize_type(t): + t = t.strip() + s = t.rfind("*") + if s != -1: + # strip everything after the pointer name + t = t[:s+1] + # Drop the variable name + s = t.split() + typenames = 1 + if len(s)>1 and s[0]=='unsigned' and s[1]=='int': + typenames = 2 + if len(s) > typenames and s[-1][0] in string.letters: + del s[-1] + if not s: + print "XXX", t + return "" + # Drop register + if s[0] == "register": + del s[0] + # discard all spaces + return ''.join(s) + +def compare_type(t1, t2): + t1 = normalize_type(t1) + t2 = normalize_type(t2) + if t1 == r'\moreargs' and t2 == '...': + return False + if t1 != t2: + #print "different:", t1, t2 + return False + return True + + +def compare_types(ret, params, hret, hparams): + if not compare_type(ret, hret): + return False + params = params.split(",") + hparams = hparams.split(",") + if not params and hparams == ['void']: + return True + if not hparams and params == ['void']: + return True + if len(params) != len(hparams): + return False + for p1, p2 in zip(params, hparams): + if not compare_type(p1, p2): + return False + return True + +def main(): + headers = list_headers() + documented = list_documented_items() + + lines = [] + for h in headers: + data = open(h).read() + data, n = re.subn(r"PyAPI_FUNC\(([^)]*)\)", r"\1", data) + name = os.path.basename(h) + with open(name, "w") as f: + f.write(data) + cmd = ("ctags -f - --file-scope=no --c-kinds=p --fields=S " + "-Istaticforward -Istatichere=static " + name) + with os.popen(cmd) as f: + lines.extend(f.readlines()) + os.unlink(name) + L = {} + prevsym = None + for line in lines: + if not line: + break + sym, filename, signature = line.split(None, 2) + if sym == prevsym: + continue + expr = "\^(.*)%s" % sym + m = re.search(expr, signature) + if not m: + print "Could not split",signature, "using",expr + rettype = m.group(1).strip() + m = re.search("signature:\(([^)]*)\)", signature) + if not m: + print "Could not get signature from", signature + params = m.group(1) + L[sym] = (rettype, params) + + for sym, ret, params, fn in documented: + if sym not in L: + print "No declaration for '%s'" % sym + continue + hret, hparams = L[sym] + if not compare_types(ret, params, hret, hparams): + print "Declaration error for %s (%s):" % (sym, fn) + print ret+": "+params + print hret+": "+hparams + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/custlib.py b/sys/src/cmd/python/Doc/tools/custlib.py new file mode 100644 index 000000000..15f07baf0 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/custlib.py @@ -0,0 +1,78 @@ +# Generate custlib.tex, which is a site-specific library document. + +# Phase I: list all the things that can be imported + +import glob +import os.path +import sys + +modules = {} + +for modname in sys.builtin_module_names: + modules[modname] = modname + +for dir in sys.path: + # Look for *.py files + filelist = glob.glob(os.path.join(dir, '*.py')) + for file in filelist: + path, file = os.path.split(file) + base, ext = os.path.splitext(file) + modules[base.lower()] = base + + # Look for shared library files + filelist = (glob.glob(os.path.join(dir, '*.so')) + + glob.glob(os.path.join(dir, '*.sl')) + + glob.glob(os.path.join(dir, '*.o')) ) + for file in filelist: + path, file = os.path.split(file) + base, ext = os.path.splitext(file) + if base[-6:] == 'module': + base = base[:-6] + modules[base.lower()] = base + +# Minor oddity: the types module is documented in libtypes2.tex +if modules.has_key('types'): + del modules['types'] + modules['types2'] = None + +# Phase II: find all documentation files (lib*.tex) +# and eliminate modules that don't have one. + +docs = {} +filelist = glob.glob('lib*.tex') +for file in filelist: + modname = file[3:-4] + docs[modname] = modname + +mlist = modules.keys() +mlist = filter(lambda x, docs=docs: docs.has_key(x), mlist) +mlist.sort() +mlist = map(lambda x, docs=docs: docs[x], mlist) + +modules = mlist + +# Phase III: write custlib.tex + +# Write the boilerplate +# XXX should be fancied up. +print """\documentstyle[twoside,11pt,myformat]{report} +\\title{Python Library Reference} +\\input{boilerplate} +\\makeindex % tell \\index to actually write the .idx file +\\begin{document} +\\pagenumbering{roman} +\\maketitle +\\input{copyright} +\\begin{abstract} +\\noindent This is a customized version of the Python Library Reference. +\\end{abstract} +\\pagebreak +{\\parskip = 0mm \\tableofcontents} +\\pagebreak\\pagenumbering{arabic}""" + +for modname in mlist: + print "\\input{lib%s}" % (modname,) + +# Write the end +print """\\input{custlib.ind} % Index +\\end{document}""" diff --git a/sys/src/cmd/python/Doc/tools/findcsyms b/sys/src/cmd/python/Doc/tools/findcsyms new file mode 100755 index 000000000..ac9b75425 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/findcsyms @@ -0,0 +1,136 @@ +#! /usr/bin/env python + +import errno +import os +import re +import sys + +if __name__ == "__main__": + _base = sys.argv[0] +else: + _base = __file__ + +_script_home = os.path.abspath(os.path.dirname(_base)) + +srcdir = os.path.dirname(os.path.dirname(_script_home)) + +EXCLUDES = ["bitset.h", "cStringIO.h", "graminit.h", "grammar.h", + "longintrepr.h", "metagrammar.h", + "node.h", "opcode.h", "osdefs.h", "pgenheaders.h", + "py_curses.h", "parsetok.h", "symtable.h", "token.h"] + + +def list_headers(): + """Return a list of headers.""" + incdir = os.path.join(srcdir, "Include") + return [fn for fn in os.listdir(incdir) + if fn.endswith(".h") and fn not in EXCLUDES] + + +def matcher(pattern): + return re.compile(pattern).match + +MATCHERS = [ + matcher(r"\\begin\{cfuncdesc\}\{[^{]*\}\{(?P<sym>[^{]*)\}"), + matcher(r"\\cfuncline\{[^{]*\}\{(?P<sym>[^{]*)\}"), + matcher(r"\\begin\{ctypedesc\}(\[[^{]*\])?\{(?P<sym>[^{]*)\}"), + matcher(r"\\begin\{cvardesc\}\{[^{]*\}\{(?P<sym>[^{]*)\}"), + matcher(r"\\begin\{cmemberdesc\}\{[^{]*\}\{(?P<sym>[^{]*)\}"), + matcher(r"\\cmemberline\{[^{]*\}\{(?P<sym>[^{]*)\}"), + matcher(r"\\begin\{csimplemacrodesc\}\{(?P<sym>[^{]*)\}"), + ] + + +def list_documented_items(): + """Return a list of everything that's already documented.""" + apidir = os.path.join(srcdir, "Doc", "api") + files = [fn for fn in os.listdir(apidir) if fn.endswith(".tex")] + L = [] + for fn in files: + fullname = os.path.join(apidir, fn) + for line in open(fullname): + line = line.lstrip() + if not line.startswith("\\"): + continue + for matcher in MATCHERS: + m = matcher(line) + if m: + L.append(m.group("sym")) + break + return L + +def split_documented(all, documented): + """Split the list of all symbols into documented and undocumented + categories.""" + doc = [] + undoc = [] + for t in all: + if t[0] in documented: + doc.append(t) + else: + undoc.append(t) + return doc, undoc + +def print_list(L, title=None): + """Dump a list to stdout.""" + if title: + print title + ":" + print "-" * (len(title) + 1) + w = 0 + for sym, filename in L: + w = max(w, len(sym)) + if w % 4 == 0: + w += 4 + else: + w += (4 - (w % 4)) + for sym, filename in L: + print "%-*s%s" % (w, sym, filename) + + +_spcjoin = ' '.join + +def main(): + args = sys.argv[1:] + if args: + headers = args + documented = [] + else: + os.chdir(os.path.join(srcdir, "Include")) + headers = list_headers() + documented = list_documented_items() + + cmd = ("ctags -f - --file-scope=no --c-types=dgpstux " + "-Istaticforward -Istatichere=static " + + _spcjoin(headers)) + fp = os.popen(cmd) + L = [] + prevsym = None + while 1: + line = fp.readline() + if not line: + break + sym, filename = line.split()[:2] + if sym == prevsym: + continue + if not sym.endswith("_H"): + L.append((sym, filename)) + prevsym = sym + L.sort() + fp.close() + + try: + if documented: + documented, undocumented = split_documented(L, documented) + print_list(documented, "Documented symbols") + if undocumented: + print + print_list(undocumented, "Undocumented symbols") + else: + print_list(L) + except IOError, e: + if e.errno != errno.EPIPE: + raise + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/findmodrefs b/sys/src/cmd/python/Doc/tools/findmodrefs new file mode 100755 index 000000000..8c5f93fb6 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/findmodrefs @@ -0,0 +1,63 @@ +#! /usr/bin/env python +# -*- Python -*- + +import fileinput +import getopt +import glob +import os +import re +import sys + + +declare_rx = re.compile( + r"\\declaremodule(?:\[[a-zA-Z0-9]*\]*)?{[a-zA-Z_0-9]+}{([a-zA-Z_0-9]+)}") + +module_rx = re.compile(r"\\module{([a-zA-Z_0-9]+)}") + +def main(): + try: + just_list = 0 + print_lineno = 0 + opts, args = getopt.getopt(sys.argv[1:], "ln", ["list", "number"]) + for opt, arg in opts: + if opt in ("-l", "--list"): + just_list = 1 + elif opt in ("-n", "--number"): + print_lineno = 1 + files = args + if not files: + files = glob.glob("*.tex") + files.sort() + modulename = None + for line in fileinput.input(files): + if line[:9] == r"\section{": + modulename = None + continue + if line[:16] == r"\modulesynopsys{": + continue + m = declare_rx.match(line) + if m: + modulename = m.group(1) + continue + if not modulename: + continue + m = module_rx.search(line) + if m: + name = m.group(1) + if name != modulename: + filename = fileinput.filename() + if just_list: + print filename + fileinput.nextfile() + modulename = None + elif print_lineno: + print "%s(%d):%s" \ + % (filename, fileinput.filelineno(), line[:-1]) + else: + print "%s:%s" % (filename, line[:-1]) + except KeyboardInterrupt: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/findsyms b/sys/src/cmd/python/Doc/tools/findsyms new file mode 100755 index 000000000..3b0f70991 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/findsyms @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +# Released to the public domain by Skip Montanaro, 28 March 2002 + +""" +findsyms.py - try to identify undocumented symbols exported by modules + +Usage: findsyms.py librefdir + +For each lib*.tex file in the libref manual source directory, identify which +module is documented, import the module if possible, then search the LaTeX +source for the symbols global to that module. Report any that don't seem to +be documented. + +Certain exceptions are made to the list of undocumented symbols: + + * don't mention symbols in which all letters are upper case on the + assumption they are manifest constants + + * don't mention symbols that are themselves modules + + * don't mention symbols that match those exported by os, math, string, + types, or __builtin__ modules + +Finally, if a name is exported by the module but fails a getattr() lookup, +that anomaly is reported. +""" + +import __builtin__ +import getopt +import glob +import math +import os +import re +import string +import sys +import types +import warnings + +def usage(): + print >> sys.stderr, """ +usage: %s dir +where 'dir' is the Library Reference Manual source directory. +""" % os.path.basename(sys.argv[0]) + +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], "") + except getopt.error: + usage() + return + + if not args: + usage() + return + + libdir = args[0] + + warnings.filterwarnings("error") + + pat = re.compile(r"\\declaremodule\s*{[^}]*}\s*{([^}]*)}") + + missing = [] + filelist = glob.glob(os.path.join(libdir, "lib*.tex")) + filelist.sort() + for f in filelist: + mod = f[3:-4] + if not mod: continue + data = open(f).read() + mods = re.findall(pat, data) + if not mods: + print "No module declarations found in", f + continue + for modname in mods: + # skip special modules + if modname.startswith("__"): + continue + try: + mod = __import__(modname) + except ImportError: + missing.append(modname) + continue + except DeprecationWarning: + print "Deprecated module:", modname + continue + if hasattr(mod, "__all__"): + all = mod.__all__ + else: + all = [k for k in dir(mod) if k[0] != "_"] + mentioned = 0 + all.sort() + for name in all: + if data.find(name) == -1: + # certain names are predominantly used for testing + if name in ("main","test","_test"): + continue + # is it some sort of manifest constant? + if name.upper() == name: + continue + try: + item = getattr(mod, name) + except AttributeError: + print " ", name, "exposed, but not an attribute" + continue + # don't care about modules that might be exposed + if type(item) == types.ModuleType: + continue + # check a few modules which tend to be import *'d + isglobal = 0 + for m in (os, math, string, __builtin__, types): + if hasattr(m, name) and item == getattr(m, name): + isglobal = 1 + break + if isglobal: continue + if not mentioned: + print "Not mentioned in", modname, "docs:" + mentioned = 1 + print " ", name + if missing: + missing.sort() + print "Could not import:" + print " ", ", ".join(missing) + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + pass diff --git a/sys/src/cmd/python/Doc/tools/fix_hack b/sys/src/cmd/python/Doc/tools/fix_hack new file mode 100755 index 000000000..8dad11101 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/fix_hack @@ -0,0 +1,2 @@ +#!/bin/sh +sed -e 's/{\\ptt[ ]*\\char[ ]*'"'"'137}/_/g' <"$1" > "@$1" && mv "@$1" $1 diff --git a/sys/src/cmd/python/Doc/tools/fix_libaux.sed b/sys/src/cmd/python/Doc/tools/fix_libaux.sed new file mode 100755 index 000000000..fb33cc575 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/fix_libaux.sed @@ -0,0 +1,3 @@ +#! /bin/sed -f +s/{\\tt \\hackscore {}\\hackscore {}/\\sectcode{__/ +s/\\hackscore {}\\hackscore {}/__/ diff --git a/sys/src/cmd/python/Doc/tools/fixinfo.el b/sys/src/cmd/python/Doc/tools/fixinfo.el new file mode 100644 index 000000000..267a7e3c9 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/fixinfo.el @@ -0,0 +1,15 @@ +(defun fix-python-texinfo () + (goto-char (point-min)) + (replace-regexp "\\(@setfilename \\)\\([-a-z]*\\)$" + "\\1python-\\2.info") + (replace-string "@node Front Matter\n@chapter Abstract\n" + "@node Abstract\n@section Abstract\n") + (mark-whole-buffer) + (texinfo-master-menu 'update-all-nodes) + (save-buffer) + ) ;; fix-python-texinfo + +;; now really do it: +(find-file (car command-line-args-left)) +(fix-python-texinfo) +(kill-emacs) diff --git a/sys/src/cmd/python/Doc/tools/getpagecounts b/sys/src/cmd/python/Doc/tools/getpagecounts new file mode 100755 index 000000000..53404e747 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/getpagecounts @@ -0,0 +1,97 @@ +#! /usr/bin/env python + +"""Generate a page count report of the PostScript version of the manuals.""" + +__version__ = '$Revision: 39598 $' + +import getopt +import sys + + +class PageCounter: + def __init__(self): + self.doclist = [] + self.total = 0 + self.title_width = 0 + self.version = "" + + def add_document(self, prefix, title): + count = count_pages(prefix + ".ps") + self.doclist.append((title, prefix, count)) + self.title_width = max(self.title_width, len(title)) + self.total = self.total + count + + def dump(self): + fmt = "%%-%ds (%%s.ps, %%d pages)" % self.title_width + for item in self.doclist: + print fmt % item + print + print " Total page count: %d" % self.total + + def parse_options(self): + opts, args = getopt.getopt(sys.argv[1:], "r:", ["release="]) + assert not args + for opt, arg in opts: + if opt in ("-r", "--release"): + self.version = arg + + def run(self): + self.parse_options() + if self.version: + version = self.version[:3] + self.add_document("whatsnew" + version.replace(".", ""), + "What's New in Python " + version) + for prefix, title in [ + ("api", "Python/C API"), + ("ext", "Extending and Embedding the Python Interpreter"), + ("lib", "Python Library Reference"), + ("mac", "Macintosh Module Reference"), + ("ref", "Python Reference Manual"), + ("tut", "Python Tutorial"), + ("doc", "Documenting Python"), + ("inst", "Installing Python Modules"), + ("dist", "Distributing Python Modules"), + ]: + self.add_document(prefix, title) + print self.PREFIX + self.dump() + print self.SUFFIX + + PREFIX = """\ +This is the PostScript version of the standard Python documentation. +If you plan to print this, be aware that some of the documents are +long. It is formatted for printing on two-sided paper; if you do plan +to print this, *please* print two-sided if you have a printer capable +of it! To locate published copies of the larger manuals, or other +Python reference material, consult the Python Bookstore at: + + http://wiki.python.org/moin/PythonBooks + +The following manuals are included in this package: +""" + SUFFIX = """\ + + +If you have any questions, comments, or suggestions regarding these +documents, please send them via email to docs@python.org. +""" + +def count_pages(filename): + fp = open(filename) + count = 0 + while 1: + lines = fp.readlines(1024*40) + if not lines: + break + for line in lines: + if line[:7] == "%%Page:": + count = count + 1 + fp.close() + return count + + +def main(): + PageCounter().run() + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/getversioninfo b/sys/src/cmd/python/Doc/tools/getversioninfo new file mode 100755 index 000000000..d22c16ddc --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/getversioninfo @@ -0,0 +1,71 @@ +#! /usr/bin/env python + +import os +import re +import sys + +try: + __file__ +except NameError: + __file__ = sys.argv[0] + +tools = os.path.dirname(os.path.abspath(__file__)) +Doc = os.path.dirname(tools) +src = os.path.dirname(Doc) +patchlevel_h = os.path.join(src, "Include", "patchlevel.h") + +# This won't pick out all #defines, but it will pick up the ones we +# care about. +rx = re.compile(r"\s*#define\s+([a-zA-Z][a-zA-Z_0-9]*)\s+([a-zA-Z_0-9]+)") + +d = {} +f = open(patchlevel_h) +for line in f: + m = rx.match(line) + if m is not None: + name, value = m.group(1, 2) + d[name] = value +f.close() + +release = "%s.%s" % (d["PY_MAJOR_VERSION"], d["PY_MINOR_VERSION"]) +micro = int(d["PY_MICRO_VERSION"]) +shortversion = release +if micro != 0: + release += "." + str(micro) +level = d["PY_RELEASE_LEVEL"] + +suffixes = { + "PY_RELEASE_LEVEL_ALPHA": "a", + "PY_RELEASE_LEVEL_BETA": "b", + "PY_RELEASE_LEVEL_GAMMA": "c", + } + +releaseinfo = "" +if level != "PY_RELEASE_LEVEL_FINAL": + releaseinfo = suffixes[level] + str(int(d["PY_RELEASE_SERIAL"])) + +def write_file(name, text): + """Write text to a file if the file doesn't exist or if text + differs from any existing content.""" + if os.path.exists(name): + f = open(name, "r") + s = f.read() + f.close() + if s == text: + return + f = open(name, "w") + f.write(text) + f.close() + +patchlevel_tex = os.path.join(Doc, "commontex", "patchlevel.tex") + +write_file(patchlevel_tex, + "%% This file is generated by ../tools/getversioninfo;\n" + "%% do not edit manually.\n" + "\n" + "\\release{%s}\n" + "\\setreleaseinfo{%s}\n" + "\\setshortversion{%s}\n" + % (release, releaseinfo, shortversion)) + +print release + releaseinfo diff --git a/sys/src/cmd/python/Doc/tools/html2texi.pl b/sys/src/cmd/python/Doc/tools/html2texi.pl new file mode 100755 index 000000000..5dcfd46f0 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/html2texi.pl @@ -0,0 +1,1750 @@ +#! /usr/bin/env perl +# html2texi.pl -- Convert HTML documentation to Texinfo format +# Michael Ernst <mernst@cs.washington.edu> +# Time-stamp: <1999-01-12 21:34:27 mernst> + +# This program converts HTML documentation trees into Texinfo format. +# Given the name of a main (or contents) HTML file, it processes that file, +# and other files (transitively) referenced by it, into a Texinfo file +# (whose name is chosen from the file or directory name of the argument). +# For instance: +# html2texi.pl api/index.html +# produces file "api.texi". + +# Texinfo format can be easily converted to Info format (for browsing in +# Emacs or the standalone Info browser), to a printed manual, or to HTML. +# Thus, html2texi.pl permits conversion of HTML files to Info format, and +# secondarily enables producing printed versions of Web page hierarchies. + +# Unlike HTML, Info format is searchable. Since Info is integrated into +# Emacs, one can read documentation without starting a separate Web +# browser. Additionally, Info browsers (including Emacs) contain +# convenient features missing from Web browsers, such as easy index lookup +# and mouse-free browsing. + +# Limitations: +# html2texi.pl is currently tuned to latex2html output (and it corrects +# several latex2html bugs), but should be extensible to arbitrary HTML +# documents. It will be most useful for HTML with a hierarchical structure +# and an index, and it recognizes those features as created by latex2html +# (and possibly by some other tools). The HTML tree to be traversed must +# be on local disk, rather than being accessed via HTTP. +# This script requires the use of "checkargs.pm". To eliminate that +# dependence, replace calls to check_args* by @_ (which is always the last +# argument to those functions). +# Also see the "to do" section, below. +# Comments, suggestions, bug fixes, and enhancements are welcome. + +# Troubleshooting: +# Malformed HTML can cause this program to abort, so +# you should check your HTML files to make sure they are legal. + + +### +### Typical usage for the Python documentation: +### + +# (Actually, most of this is in a Makefile instead.) +# The resulting Info format Python documentation is currently available at +# ftp://ftp.cs.washington.edu/homes/mernst/python-info.tar.gz + +# Fix up HTML problems, eg <DT><DL COMPACT><DD> should be <DT><DL COMPACT><DD>. + +# html2texi.pl /homes/fish/mernst/tmp/python-doc/html/api/index.html +# html2texi.pl /homes/fish/mernst/tmp/python-doc/html/ext/index.html +# html2texi.pl /homes/fish/mernst/tmp/python-doc/html/lib/index.html +# html2texi.pl /homes/fish/mernst/tmp/python-doc/html/mac/index.html +# html2texi.pl /homes/fish/mernst/tmp/python-doc/html/ref/index.html +# html2texi.pl /homes/fish/mernst/tmp/python-doc/html/tut/index.html + +# Edit the generated .texi files: +# * change @setfilename to prefix "python-" +# * fix up any sectioning, such as for Abstract +# * make Texinfo menus +# * perhaps remove the @detailmenu ... @end detailmenu +# In Emacs, to do all this: +# (progn (goto-char (point-min)) (replace-regexp "\\(@setfilename \\)\\([-a-z]*\\)$" "\\1python-\\2.info") (replace-string "@node Front Matter\n@chapter Abstract\n" "@node Abstract\n@section Abstract\n") (progn (mark-whole-buffer) (texinfo-master-menu 'update-all-nodes)) (save-buffer)) + +# makeinfo api.texi +# makeinfo ext.texi +# makeinfo lib.texi +# makeinfo mac.texi +# makeinfo ref.texi +# makeinfo tut.texi + + +### +### Structure of the code +### + +# To be written... + + +### +### Design decisions +### + +# Source and destination languages +# -------------------------------- +# +# The goal is Info files; I create Texinfo, so I don't have to worry about +# the finer details of Info file creation. (I'm not even sure of its exact +# format.) +# +# Why not start from LaTeX rather than HTML? +# I could hack latex2html itself to produce Texinfo instead, or fix up +# partparse.py (which already translates LaTeX to Teinfo). +# Pros: +# * has high-level information such as index entries, original formatting +# Cons: +# * those programs are complicated to read and understand +# * those programs try to handle arbitrary LaTeX input, track catcodes, +# and more: I don't want to go to that effort. HTML isn't as powerful +# as LaTeX, so there are fewer subtleties. +# * the result wouldn't work for arbitrary HTML documents; it would be +# nice to eventually extend this program to HTML produced from Docbook, +# Frame, and more. + +# Parsing +# ------- +# +# I don't want to view the text as a linear stream; I'd rather parse the +# whole thing and then do pattern matching over the parsed representation (to +# find idioms such as indices, lists of child nodes, etc.). +# * Perl provides HTML::TreeBuilder, which does just what I want. +# * libwww-perl: http://www.linpro.no/lwp/ +# * TreeBuilder: HTML-Tree-0.51.tar.gz +# * Python Parsers, Formatters, and Writers don't really provide the right +# interface (and the version in Grail doesn't correspond to another +# distributed version, so I'm confused about which to be using). I could +# write something in Python that creates a parse tree, but why bother? + +# Other implementation language issues: +# * Python lacks variable declarations, reasonable scoping, and static +# checking tools. I've written some of the latter for myself that make +# my Perl programming a lot safer than my Python programming will be until +# I have a similar suite for that language. + + +########################################################################### +### To do +### + +# Section names: +# Fix the problem with multiple sections in a single file (eg, Abstract in +# Front Matter section). +# Deal with cross-references, as in /homes/fish/mernst/tmp/python-doc/html/ref/types.html:310 +# Index: +# Perhaps double-check that every tag mentioned in the index is found +# in the text. +# Python: email to docs@python.org, to get their feedback. +# Compare to existing lib/ Info manual +# Write the hooks into info-look; replace pyliblookup1-1.tar.gz. +# Postpass to remove extra quotation marks around typography already in +# a different font (to avoid double delimiters as in "`code'"); or +# perhaps consider using only font-based markup so that we don't get +# the extra *bold* and `code' markup in Info. + +## Perhaps don't rely on automatic means for adding up, next, prev; I have +## all that info available to me already, so it's not so much trouble to +## add it. (Right?) But it is *so* easy to use Emacs instead... + + +########################################################################### +### Strictures +### + +# man HTML::TreeBuilder +# man HTML::Parser +# man HTML::Element + +# require HTML::ParserWComment; +require HTML::Parser; +require HTML::TreeBuilder; +require HTML::Element; + +use File::Basename; + +use strict; +# use Carp; + +use checkargs; + + +########################################################################### +### Variables +### + +my @section_stack = (); # elements are chapter/section/subsec nodetitles (I think) +my $current_ref_tdf; # for the file currently being processed; + # used in error messages +my $html_directory; +my %footnotes; + +# First element should not be used. +my @sectionmarker = ("manual", "chapter", "section", "subsection", "subsubsection"); + +my %inline_markup = ("b" => "strong", + "code" => "code", + "i" => "emph", + "kbd" => "kbd", + "samp" => "samp", + "strong" => "strong", + "tt" => "code", + "var" => "var"); + +my @deferred_index_entries = (); + +my @index_titles = (); # list of (filename, type) lists +my %index_info = ("Index" => ["\@blindex", "bl"], + "Concept Index" => ["\@cindex", "cp"], + "Module Index" => ["\@mdindex", "md"]); + + +########################################################################### +### Main/contents page +### + +# Process first-level page on its own, or just a contents page? Well, I do +# want the title, author, etc., and the front matter... For now, just add +# that by hand at the end. + + +# data structure possibilities: +# * tree-like (need some kind of stack when processing (or parent pointers)) +# * list of name and depth; remember old and new depths. + +# Each element is a reference to a list of (nodetitle, depth, filename). +my @contents_list = (); + +# The problem with doing fixups on the fly is that some sections may have +# already been processed (and no longer available) by the time we notice +# others with the same name. It's probably better to fully construct the +# contents list (reading in all files of interest) upfront; that will also +# let me do a better job with cross-references, because again, all files +# will already be read in. +my %contents_hash = (); +my %contents_fixups = (); + +my @current_contents_list = (); + +# Merge @current_contents_list into @contents_list, +# and set @current_contents_list to be empty. +sub merge_contents_lists ( ) +{ check_args(0, @_); + + # Three possibilities: + # * @contents_list is empty: replace it by @current_contents_list. + # * prefixes of the two lists are identical: do nothing + # * @current_contents_list is all at lower level than $contents_list[0]; + # prefix @contents_list by @current_contents_list + + if (scalar(@current_contents_list) == 0) + { die "empty current_contents_list"; } + + # if (scalar(@contents_list) == 0) + # { @contents_list = @current_contents_list; + # @current_contents_list = (); + # return; } + + # if (($ {$contents_list[0]}[1]) < ($ {$current_contents_list[0]}[1])) + # { unshift @contents_list, @current_contents_list; + # @current_contents_list = (); + # return; } + + for (my $i=0; $i<scalar(@current_contents_list); $i++) + { my $ref_c_tdf = $current_contents_list[$i]; + if ($i >= scalar(@contents_list)) + { push @contents_list, $ref_c_tdf; + my $title = $ {$ref_c_tdf}[0]; + if (defined $contents_hash{$title}) + { $contents_fixups{$title} = 1; } + else + { $contents_hash{$title} = 1; } + next; } + my $ref_tdf = $contents_list[$i]; + my ($title, $depth, $file) = @{$ref_tdf}; + my ($c_title, $c_depth, $c_file) = @{$ref_c_tdf}; + + if (($title ne $c_title) + && ($depth < $c_depth) + && ($file ne $c_file)) + { splice @contents_list, $i, 0, $ref_c_tdf; + if (defined $contents_hash{$c_title}) + { $contents_fixups{$c_title} = 1; } + else + { $contents_hash{$c_title} = 1; } + next; } + + if (($title ne $c_title) + || ($depth != $c_depth) + || ($file ne $c_file)) + { die ("while processing $ {$current_ref_tdf}[2] at depth $ {$current_ref_tdf}[1], mismatch at index $i:", + "\n main: <<<$title>>> $depth $file", + "\n curr: <<<$c_title>>> $c_depth $c_file"); } + } + @current_contents_list = (); +} + + + +# Set @current_contents_list to a list of (title, href, sectionlevel); +# then merge that list into @contents_list. +# Maybe this function should also produce a map +# from title (or href) to sectionlevel (eg "chapter"?). +sub process_child_links ( $ ) +{ my ($he) = check_args(1, @_); + + # $he->dump(); + if (scalar(@current_contents_list) != 0) + { die "current_contents_list nonempty: @current_contents_list"; } + $he->traverse(\&increment_current_contents_list, 'ignore text'); + + # Normalize the depths; for instance, convert 1,3,5 into 0,1,2. + my %depths = (); + for my $ref_tdf (@current_contents_list) + { $depths{$ {$ref_tdf}[1]} = 1; } + my @sorted_depths = sort keys %depths; + my $current_depth = scalar(@section_stack)-1; + my $current_depth_2 = $ {$current_ref_tdf}[1]; + if ($current_depth != $current_depth_2) + { die "mismatch in current depths: $current_depth $current_depth_2; ", join(", ", @section_stack); } + for (my $i=0; $i<scalar(@sorted_depths); $i++) + { $depths{$sorted_depths[$i]} = $i + $current_depth+1; } + for my $ref_tdf (@current_contents_list) + { $ {$ref_tdf}[1] = $depths{$ {$ref_tdf}[1]}; } + + # Eliminate uninteresting sections. Hard-coded hack for now. + if ($ {$current_contents_list[-1]}[0] eq "About this document ...") + { pop @current_contents_list; } + if ((scalar(@current_contents_list) > 1) + && ($ {$current_contents_list[1]}[0] eq "Contents")) + { my $ref_first_tdf = shift @current_contents_list; + $current_contents_list[0] = $ref_first_tdf; } + + for (my $i=0; $i<scalar(@current_contents_list); $i++) + { my $ref_tdf = $current_contents_list[$i]; + my $title = $ {$ref_tdf}[0]; + if (exists $index_info{$title}) + { my $index_file = $ {$ref_tdf}[2]; + my ($indexing_command, $suffix) = @{$index_info{$title}}; + process_index_file($index_file, $indexing_command); + print TEXI "\n\@defindex $suffix\n"; + push @index_titles, $title; + splice @current_contents_list, $i, 1; + $i--; } + elsif ($title =~ /\bIndex$/) + { print STDERR "Warning: \"$title\" might be an index; if so, edit \%index_info.\n"; } } + + merge_contents_lists(); + + # print_contents_list(); + # print_index_info(); +} + + +sub increment_current_contents_list ( $$$ ) +{ my ($he, $startflag, $depth) = check_args(3, @_); + if (!$startflag) + { return; } + + if ($he->tag eq "li") + { my @li_content = @{$he->content}; + if ($li_content[0]->tag ne "a") + { die "first element of <LI> should be <A>"; } + my ($name, $href, @content) = anchor_info($li_content[0]); + # unused $name + my $title = join("", collect_texts($li_content[0])); + $title = texi_remove_punctuation($title); + # The problem with these is that they are formatted differently in + # @menu and @node! + $title =~ s/``/\"/g; + $title =~ s/''/\"/g; + $title =~ s/ -- / /g; + push @current_contents_list, [ $title, $depth, $href ]; } + return 1; +} + +# Simple version for section titles +sub html_to_texi ( $ ) +{ my ($he) = check_args(1, @_); + if (!ref $he) + { return $he; } + + my $tag = $he->tag; + if (exists $inline_markup{$tag}) + { my $result = "\@$inline_markup{$tag}\{"; + for my $elt (@{$he->content}) + { $result .= html_to_texi($elt); } + $result .= "\}"; + return $result; } + else + { $he->dump(); + die "html_to_texi confused by <$tag>"; } +} + + + +sub print_contents_list () +{ check_args(0, @_); + print STDERR "Contents list:\n"; + for my $ref_tdf (@contents_list) + { my ($title, $depth, $file) = @{$ref_tdf}; + print STDERR "$title $depth $file\n"; } +} + + + +########################################################################### +### Index +### + +my $l2h_broken_link_name = "l2h-"; + + +# map from file to (map from anchor name to (list of index texts)) +# (The list is needed when a single LaTeX command like \envvar +# expands to multiple \index commands.) +my %file_index_entries = (); +my %this_index_entries; # map from anchor name to (list of index texts) + +my %file_index_entries_broken = (); # map from file to (list of index texts) +my @this_index_entries_broken; + +my $index_prefix = ""; +my @index_prefixes = (); + +my $this_indexing_command; + +sub print_index_info () +{ check_args(0, @_); + my ($key, $val); + for my $file (sort keys %file_index_entries) + { my %index_entries = %{$file_index_entries{$file}}; + print STDERR "file: $file\n"; + for my $aname (sort keys %index_entries) + { my @entries = @{$index_entries{$aname}}; + if (scalar(@entries) == 1) + { print STDERR " $aname : $entries[0]\n"; } + else + { print STDERR " $aname : ", join("\n " . (" " x length($aname)), @entries), "\n"; } } } + for my $file (sort keys %file_index_entries_broken) + { my @entries = @{$file_index_entries_broken{$file}}; + print STDERR "file: $file\n"; + for my $entry (@entries) + { print STDERR " $entry\n"; } + } +} + + +sub process_index_file ( $$ ) +{ my ($file, $indexing_command) = check_args(2, @_); + # print "process_index_file $file $indexing_command\n"; + + my $he = file_to_tree($html_directory . $file); + # $he->dump(); + + $this_indexing_command = $indexing_command; + $he->traverse(\&process_if_index_dl_compact, 'ignore text'); + undef $this_indexing_command; + # print "process_index_file done\n"; +} + + +sub process_if_index_dl_compact ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + if (!$startflag) + { return; } + + if (($he->tag() eq "dl") && (defined $he->attr('compact'))) + { process_index_dl_compact($he); + return 0; } + else + { return 1; } +} + + +# The elements of a <DL COMPACT> list from a LaTeX2HTML index: +# * a single space: text to be ignored +# * <DT> elements with an optional <DD> element following each one +# Two types of <DT> elements: +# * Followed by a <DD> element: the <DT> contains a single +# string, and the <DD> contains a whitespace string to be ignored, a +# <DL COMPACT> to be recursively processed (with the <DT> string as a +# prefix), and a whitespace string to be ignored. +# * Not followed by a <DD> element: contains a list of anchors +# and texts (ignore the texts, which are only whitespace and commas). +# Optionally contains a <DL COMPACT> to be recursively processed (with +# the <DT> string as a prefix) +sub process_index_dl_compact ( $ ) +{ my ($h) = check_args(1, @_); + my @content = @{$h->content()}; + for (my $i = 0; $i < scalar(@content); $i++) + { my $this_he = $content[$i]; + if ($this_he->tag ne "dt") + { $this_he->dump(); + die "Expected <DT> tag: " . $this_he->tag; } + if (($i < scalar(@content) - 1) && ($content[$i+1]->tag eq "dd")) + { process_index_dt_and_dd($this_he, $content[$i+1]); + $i++; } + else + { process_index_lone_dt($this_he); } } } + + + +# Argument is a <DT> element. If it contains more than one anchor, then +# the texts of all subsequent ones are "[Link]". Example: +# <DT> +# <A HREF="embedding.html#l2h-201"> +# "$PATH" +# ", " +# <A HREF="embedding.html#l2h-205"> +# "[Link]" +# Optionally contains a <DL COMPACT> as well. Example: +# <DT> +# <A HREF="types.html#l2h-616"> +# "attribute" +# <DL COMPACT> +# <DT> +# <A HREF="assignment.html#l2h-3074"> +# "assignment" +# ", " +# <A HREF="assignment.html#l2h-3099"> +# "[Link]" +# <DT> +# <A HREF="types.html#l2h-"> +# "assignment, class" + +sub process_index_lone_dt ( $ ) +{ my ($dt) = check_args(1, @_); + my @dtcontent = @{$dt->content()}; + my $acontent; + my $acontent_suffix; + for my $a (@dtcontent) + { if ($a eq ", ") + { next; } + if (!ref $a) + { $dt->dump; + die "Unexpected <DT> string element: $a"; } + + if ($a->tag eq "dl") + { push @index_prefixes, $index_prefix; + if (!defined $acontent_suffix) + { die "acontent_suffix not yet defined"; } + $index_prefix .= $acontent_suffix . ", "; + process_index_dl_compact($a); + $index_prefix = pop(@index_prefixes); + return; } + + if ($a->tag ne "a") + { $dt->dump; + $a->dump; + die "Expected anchor in lone <DT>"; } + + my ($aname, $ahref, @acontent) = anchor_info($a); + # unused $aname + if (scalar(@acontent) != 1) + { die "Expected just one content of <A> in <DT>: @acontent"; } + if (ref $acontent[0]) + { $acontent[0]->dump; + die "Expected string content of <A> in <DT>: $acontent[0]"; } + if (!defined($acontent)) + { $acontent = $index_prefix . $acontent[0]; + $acontent_suffix = $acontent[0]; } + elsif (($acontent[0] ne "[Link]") && ($acontent ne ($index_prefix . $acontent[0]))) + { die "Differing content: <<<$acontent>>>, <<<$acontent[0]>>>"; } + + if (!defined $ahref) + { $dt->dump; + die "no HREF in nachor in <DT>"; } + my ($ahref_file, $ahref_name) = split(/\#/, $ahref); + if (!defined $ahref_name) + { # Reference to entire file + $ahref_name = ""; } + + if ($ahref_name eq $l2h_broken_link_name) + { if (!exists $file_index_entries_broken{$ahref_file}) + { $file_index_entries_broken{$ahref_file} = []; } + push @{$file_index_entries_broken{$ahref_file}}, "$this_indexing_command $acontent"; + next; } + + if (!exists $file_index_entries{$ahref_file}) + { $file_index_entries{$ahref_file} = {}; } + # Don't do this! It appears to make a copy, which is not desired. + # my %index_entries = %{$file_index_entries{$ahref_file}}; + if (!exists $ {$file_index_entries{$ahref_file}}{$ahref_name}) + { $ {$file_index_entries{$ahref_file}}{$ahref_name} = []; } + # { my $oldcontent = $ {$file_index_entries{$ahref_file}}{$ahref_name}; + # if ($acontent eq $oldcontent) + # { die "Multiple identical index entries?"; } + # die "Trying to add $acontent, but already have index entry pointing at $ahref_file\#$ahref_name: ${$file_index_entries{$ahref_file}}{$ahref_name}"; } + + push @{$ {$file_index_entries{$ahref_file}}{$ahref_name}}, "$this_indexing_command $acontent"; + # print STDERR "keys: ", keys %{$file_index_entries{$ahref_file}}, "\n"; + } +} + +sub process_index_dt_and_dd ( $$ ) +{ my ($dt, $dd) = check_args(2, @_); + my $dtcontent; + { my @dtcontent = @{$dt->content()}; + if ((scalar(@dtcontent) != 1) || (ref $dtcontent[0])) + { $dd->dump; + $dt->dump; + die "Expected single string (actual size = " . scalar(@dtcontent) . ") in content of <DT>: @dtcontent"; } + $dtcontent = $dtcontent[0]; + $dtcontent =~ s/ +$//; } + my $ddcontent; + { my @ddcontent = @{$dd->content()}; + if (scalar(@ddcontent) != 1) + { die "Expected single <DD> content, got ", scalar(@ddcontent), " elements:\n", join("\n", @ddcontent), "\n "; } + $ddcontent = $ddcontent[0]; } + if ($ddcontent->tag ne "dl") + { die "Expected <DL> as content of <DD>, but saw: $ddcontent"; } + + push @index_prefixes, $index_prefix; + $index_prefix .= $dtcontent . ", "; + process_index_dl_compact($ddcontent); + $index_prefix = pop(@index_prefixes); +} + + +########################################################################### +### Ordinary sections +### + +sub process_section_file ( $$$ ) +{ my ($file, $depth, $nodetitle) = check_args(3, @_); + my $he = file_to_tree(($file =~ /^\//) ? $file : $html_directory . $file); + + # print STDERR "process_section_file: $file $depth $nodetitle\n"; + + # Equivalently: + # while ($depth >= scalar(@section_stack)) { pop(@section_stack); } + @section_stack = @section_stack[0..$depth-1]; + + # Not a great nodename fixup scheme; need a more global view + if ((defined $contents_fixups{$nodetitle}) + && (scalar(@section_stack) > 0)) + { my $up_title = $section_stack[$#section_stack]; + # hack for Python Standard Library + $up_title =~ s/^(Built-in|Standard) Module //g; + my ($up_first_word) = split(/ /, $up_title); + $nodetitle = "$up_first_word $nodetitle"; + } + + push @section_stack, $nodetitle; + # print STDERR "new section_stack: ", join(", ", @section_stack), "\n"; + + $he->traverse(\&process_if_child_links, 'ignore text'); + %footnotes = (); + # $he->dump; + $he->traverse(\&process_if_footnotes, 'ignore text'); + + # $he->dump; + + if (exists $file_index_entries{$file}) + { %this_index_entries = %{$file_index_entries{$file}}; + # print STDERR "this_index_entries:\n ", join("\n ", keys %this_index_entries), "\n"; + } + else + { # print STDERR "Warning: no index entries for file $file\n"; + %this_index_entries = (); } + + if (exists $file_index_entries_broken{$file}) + { @this_index_entries_broken = @{$file_index_entries_broken{$file}}; } + else + { # print STDERR "Warning: no index entries for file $file\n"; + @this_index_entries_broken = (); } + + + if ($he->tag() ne "html") + { die "Expected <HTML> at top level"; } + my @content = @{$he->content()}; + if ((!ref $content[0]) or ($content[0]->tag ne "head")) + { $he->dump; + die "<HEAD> not first element of <HTML>"; } + if ((!ref $content[1]) or ($content[1]->tag ne "body")) + { $he->dump; + die "<BODY> not second element of <HTML>"; } + + $content[1]->traverse(\&output_body); +} + +# stack of things we're inside that are preventing indexing from occurring now. +# These are "h1", "h2", "h3", "h4", "h5", "h6", "dt" (and possibly others?) +my @index_deferrers = (); + +sub push_or_pop_index_deferrers ( $$ ) +{ my ($tag, $startflag) = check_args(2, @_); + if ($startflag) + { push @index_deferrers, $tag; } + else + { my $old_deferrer = pop @index_deferrers; + if ($tag ne $old_deferrer) + { die "Expected $tag at top of index_deferrers but saw $old_deferrer; remainder = ", join(" ", @index_deferrers); } + do_deferred_index_entries(); } +} + + +sub label_add_index_entries ( $;$ ) +{ my ($label, $he) = check_args_range(1, 2, @_); + # print ((exists $this_index_entries{$label}) ? "*" : " "), " label_add_index_entries $label\n"; + # $he is the anchor element + if (exists $this_index_entries{$label}) + { push @deferred_index_entries, @{$this_index_entries{$label}}; + return; } + + if ($label eq $l2h_broken_link_name) + { # Try to find some text to use in guessing which links should point here + # I should probably only look at the previous element, or if that is + # all punctuation, the one before it; collecting all the previous texts + # is a bit of overkill. + my @anchor_texts = collect_texts($he); + my @previous_texts = collect_texts($he->parent, $he); + # 4 elements is arbitrary; ought to filter out punctuation and small words + # first, then perhaps keep fewer. Perhaps also filter out formatting so + # that we can see a larger chunk of text? (Probably not.) + # Also perhaps should do further chunking into words, in case the + # index term isn't a chunk of its own (eg, was in <tt>...</tt>. + my @candidate_texts = (@anchor_texts, (reverse(@previous_texts))[0..min(3,$#previous_texts)]); + + my $guessed = 0; + for my $text (@candidate_texts) + { # my $orig_text = $text; + if ($text =~ /^[\"\`\'().?! ]*$/) + { next; } + if (length($text) <= 2) + { next; } + # hack for Python manual; maybe defer until failure first time around? + $text =~ s/^sys\.//g; + for my $iterm (@this_index_entries_broken) + { # I could test for zero: LaTeX2HTML's failures in the Python + # documentation are only for items of the form "... (built-in...)" + if (index($iterm, $text) != -1) + { push @deferred_index_entries, $iterm; + # print STDERR "Guessing index term `$iterm' for text `$orig_text'\n"; + $guessed = 1; + } } } + if (!$guessed) + { # print STDERR "No guess in `", join("'; `", @this_index_entries_broken), "' for texts:\n `", join("'\n `", @candidate_texts), "'\n"; + } + } +} + + +# Need to add calls to this at various places. +# Perhaps add HTML::Element argument and do the check for appropriateness +# here (ie, no action if inside <H1>, etc.). +sub do_deferred_index_entries () +{ check_args(0, @_); + if ((scalar(@deferred_index_entries) > 0) + && (scalar(@index_deferrers) == 0)) + { print TEXI "\n", join("\n", @deferred_index_entries), "\n"; + @deferred_index_entries = (); } +} + +my $table_columns; # undefined if not in a table +my $table_first_column; # boolean + +sub output_body ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + + if (!ref $he) + { my $space_index = index($he, " "); + if ($space_index != -1) + { # Why does + # print TEXI texi_quote(substr($he, 0, $space_index+1)); + # give: Can't locate object method "TEXI" via package "texi_quote" + # (Because the definition texi_quote hasn't been seen yet.) + print TEXI &texi_quote(substr($he, 0, $space_index+1)); + do_deferred_index_entries(); + print TEXI &texi_quote(substr($he, $space_index+1)); } + else + { print TEXI &texi_quote($he); } + return; } + + my $tag = $he->tag(); + + # Ordinary text markup first + if (exists $inline_markup{$tag}) + { if ($startflag) + { print TEXI "\@$inline_markup{$tag}\{"; } + else + { print TEXI "\}"; } } + elsif ($tag eq "a") + { my ($name, $href, @content) = anchor_info($he); + if (!$href) + { # This anchor is only here for indexing/cross referencing purposes. + if ($startflag) + { label_add_index_entries($name, $he); } + } + elsif ($href =~ "^(ftp|http|news):") + { if ($startflag) + { # Should avoid second argument if it's identical to the URL. + print TEXI "\@uref\{$href, "; } + else + { print TEXI "\}"; } + } + elsif ($href =~ /^\#(foot[0-9]+)$/) + { # Footnote + if ($startflag) + { # Could double-check name and content, but I'm not + # currently storing that information. + print TEXI "\@footnote\{"; + $footnotes{$1}->traverse(\&output_body); + print TEXI "\}"; + return 0; } } + else + { if ($startflag) + { # cross-references are not active Info links, but no text is lost + print STDERR "Can't deal with internal HREF anchors yet:\n"; + $he->dump; } + } + } + elsif ($tag eq "br") + { print TEXI "\@\n"; } + elsif ($tag eq "body") + { } + elsif ($tag eq "center") + { if (has_single_content_string($he) + && ($ {$he->content}[0] =~ /^ *$/)) + { return 0; } + if ($startflag) + { print TEXI "\n\@center\n"; } + else + { print TEXI "\n\@end center\n"; } + } + elsif ($tag eq "div") + { my $align = $he->attr('align'); + if (defined($align) && ($align eq "center")) + { if (has_single_content_string($he) + && ($ {$he->content}[0] =~ /^ *$/)) + { return 0; } + if ($startflag) + { print TEXI "\n\@center\n"; } + else + { print TEXI "\n\@end center\n"; } } + } + elsif ($tag eq "dl") + { # Recognize "<dl><dd><pre> ... </pre></dl>" paradigm for "@example" + if (has_single_content_with_tag($he, "dd")) + { my $he_dd = $ {$he->content}[0]; + if (has_single_content_with_tag($he_dd, "pre")) + { my $he_pre = $ {$he_dd->content}[0]; + print_pre($he_pre); + return 0; } } + if ($startflag) + { # Could examine the elements, to be cleverer about formatting. + # (Also to use ftable, vtable...) + print TEXI "\n\@table \@asis\n"; } + else + { print TEXI "\n\@end table\n"; } + } + elsif ($tag eq "dt") + { push_or_pop_index_deferrers($tag, $startflag); + if ($startflag) + { print TEXI "\n\@item "; } + else + { } } + elsif ($tag eq "dd") + { if ($startflag) + { print TEXI "\n"; } + else + { } + if (scalar(@index_deferrers) != 0) + { $he->dump; + die "Unexpected <$tag> while inside: (" . join(" ", @index_deferrers) . "); bad HTML?"; } + do_deferred_index_entries(); + } + elsif ($tag =~ /^(font|big|small)$/) + { # Do nothing for now. + } + elsif ($tag =~ /^h[1-6]$/) + { # We don't need this because we never recursively enter the heading content. + # push_or_pop_index_deferrers($tag, $startflag); + my $secname = ""; + my @seclabels = (); + for my $elt (@{$he->content}) + { if (!ref $elt) + { $secname .= $elt; } + elsif ($elt->tag eq "br") + { } + elsif ($elt->tag eq "a") + { my ($name, $href, @acontent) = anchor_info($elt); + if ($href) + { $he->dump; + $elt->dump; + die "Nonsimple anchor in <$tag>"; } + if (!defined $name) + { die "No NAME for anchor in $tag"; } + push @seclabels, $name; + for my $subelt (@acontent) + { $secname .= html_to_texi($subelt); } } + else + { $secname .= html_to_texi($elt); } } + if ($secname eq "") + { die "No section name in <$tag>"; } + if (scalar(@section_stack) == 1) + { if ($section_stack[-1] ne "Top") + { die "Not top? $section_stack[-1]"; } + print TEXI "\@settitle $secname\n"; + print TEXI "\@c %**end of header\n"; + print TEXI "\n"; + print TEXI "\@node Top\n"; + print TEXI "\n"; } + else + { print TEXI "\n\@node $section_stack[-1]\n"; + print TEXI "\@$sectionmarker[scalar(@section_stack)-1] ", texi_remove_punctuation($secname), "\n"; } + for my $seclabel (@seclabels) + { label_add_index_entries($seclabel); } + # This should only happen once per file. + label_add_index_entries(""); + if (scalar(@index_deferrers) != 0) + { $he->dump; + die "Unexpected <$tag> while inside: (" . join(" ", @index_deferrers) . "); bad HTML?"; } + do_deferred_index_entries(); + return 0; + } + elsif ($tag eq "hr") + { } + elsif ($tag eq "ignore") + { # Hack for ignored elements + return 0; + } + elsif ($tag eq "li") + { if ($startflag) + { print TEXI "\n\n\@item\n"; + do_deferred_index_entries(); } } + elsif ($tag eq "ol") + { if ($startflag) + { print TEXI "\n\@enumerate \@bullet\n"; } + else + { print TEXI "\n\@end enumerate\n"; } } + elsif ($tag eq "p") + { if ($startflag) + { print TEXI "\n\n"; } + if (scalar(@index_deferrers) != 0) + { $he->dump; + die "Unexpected <$tag> while inside: (" . join(" ", @index_deferrers) . "); bad HTML?"; } + do_deferred_index_entries(); } + elsif ($tag eq "pre") + { print_pre($he); + return 0; } + elsif ($tag eq "table") + { # Could also indicate common formatting for first column, or + # determine relative widths for columns (or determine a prototype row) + if ($startflag) + { if (defined $table_columns) + { $he->dump; + die "Can't deal with table nested inside $table_columns-column table"; } + $table_columns = table_columns($he); + if ($table_columns < 2) + { $he->dump; + die "Column with $table_columns columns?"; } + elsif ($table_columns == 2) + { print TEXI "\n\@table \@asis\n"; } + else + { print TEXI "\n\@multitable \@columnfractions"; + for (my $i=0; $i<$table_columns; $i++) + { print TEXI " ", 1.0/$table_columns; } + print TEXI "\n"; } } + else + { if ($table_columns == 2) + { print TEXI "\n\@end table\n"; } + else + { print TEXI "\n\@end multitable\n"; } + undef $table_columns; } } + elsif (($tag eq "td") || ($tag eq "th")) + { if ($startflag) + { if ($table_first_column) + { print TEXI "\n\@item "; + $table_first_column = 0; } + elsif ($table_columns > 2) + { print TEXI "\n\@tab "; } } + else + { print TEXI "\n"; } } + elsif ($tag eq "tr") + { if ($startflag) + { $table_first_column = 1; } } + elsif ($tag eq "ul") + { if ($startflag) + { print TEXI "\n\@itemize \@bullet\n"; } + else + { print TEXI "\n\@end itemize\n"; } } + else + { # I used to have a newline before "output_body" here. + print STDERR "output_body: ignoring <$tag> tag\n"; + $he->dump; + return 0; } + + return 1; +} + +sub print_pre ( $ ) +{ my ($he_pre) = check_args(1, @_); + if (!has_single_content_string($he_pre)) + { die "Multiple or non-string content for <PRE>: ", @{$he_pre->content}; } + my $pre_content = $ {$he_pre->content}[0]; + print TEXI "\n\@example"; + print TEXI &texi_quote($pre_content); + print TEXI "\@end example\n"; +} + +sub table_columns ( $ ) +{ my ($table) = check_args(1, @_); + my $result = 0; + for my $row (@{$table->content}) + { if ($row->tag ne "tr") + { $table->dump; + $row->dump; + die "Expected <TR> as table row."; } + $result = max($result, scalar(@{$row->content})); } + return $result; +} + + +########################################################################### +### Utilities +### + +sub min ( $$ ) +{ my ($x, $y) = check_args(2, @_); + return ($x < $y) ? $x : $y; +} + +sub max ( $$ ) +{ my ($x, $y) = check_args(2, @_); + return ($x > $y) ? $x : $y; +} + +sub file_to_tree ( $ ) +{ my ($file) = check_args(1, @_); + + my $tree = new HTML::TreeBuilder; + $tree->ignore_unknown(1); + # $tree->warn(1); + $tree->parse_file($file); + cleanup_parse_tree($tree); + return $tree +} + + +sub has_single_content ( $ ) +{ my ($he) = check_args(1, @_); + if (!ref $he) + { # return 0; + die "Non-reference argument: $he"; } + my $ref_content = $he->content; + if (!defined $ref_content) + { return 0; } + my @content = @{$ref_content}; + if (scalar(@content) != 1) + { return 0; } + return 1; +} + + +# Return true if the content of the element contains only one element itself, +# and that inner element has the specified tag. +sub has_single_content_with_tag ( $$ ) +{ my ($he, $tag) = check_args(2, @_); + if (!has_single_content($he)) + { return 0; } + my $content = $ {$he->content}[0]; + if (!ref $content) + { return 0; } + my $content_tag = $content->tag; + if (!defined $content_tag) + { return 0; } + return $content_tag eq $tag; +} + +sub has_single_content_string ( $ ) +{ my ($he) = check_args(1, @_); + if (!has_single_content($he)) + { return 0; } + my $content = $ {$he->content}[0]; + if (ref $content) + { return 0; } + return 1; +} + + +# Return name, href, content. First two may be undefined; third is an array. +# I don't see how to determine if there are more attributes. +sub anchor_info ( $ ) +{ my ($he) = check_args(1, @_); + if ($he->tag ne "a") + { $he->dump; + die "passed non-anchor to anchor_info"; } + my $name = $he->attr('name'); + my $href = $he->attr('href'); + my @content = (); + { my $ref_content = $he->content; + if (defined $ref_content) + { @content = @{$ref_content}; } } + return ($name, $href, @content); +} + + +sub texi_quote ( $ ) +{ my ($text) = check_args(1, @_); + $text =~ s/([\@\{\}])/\@$1/g; + $text =~ s/ -- / --- /g; + return $text; +} + +# Eliminate bad punctuation (that confuses Makeinfo or Info) for section titles. +sub texi_remove_punctuation ( $ ) +{ my ($text) = check_args(1, @_); + + $text =~ s/^ +//g; + $text =~ s/[ :]+$//g; + $text =~ s/^[1-9][0-9.]* +//g; + $text =~ s/,//g; + # Both embedded colons and " -- " confuse makeinfo. (Perhaps " -- " + # gets converted into " - ", just as "---" would be converted into " -- ", + # so the names end up differing.) + # $text =~ s/:/ -- /g; + $text =~ s/://g; + return $text; +} + + +## Do not use this inside `traverse': it throws off the traversal. Use +## html_replace_by_ignore or html_replace_by_meta instead. +# Returns 1 if success, 0 if failure. +sub html_remove ( $;$ ) +{ my ($he, $parent) = check_args_range(1, 2, @_); + if (!defined $parent) + { $parent = $he->parent; } + my $ref_pcontent = $parent->content; + my @pcontent = @{$ref_pcontent}; + for (my $i=0; $i<scalar(@pcontent); $i++) + { if ($pcontent[$i] eq $he) + { splice @{$ref_pcontent}, $i, 1; + $he->parent(undef); + return 1; } } + die "Didn't find $he in $parent"; +} + + +sub html_replace ( $$;$ ) +{ my ($orig, $new, $parent) = check_args_range(2, 3, @_); + if (!defined $parent) + { $parent = $orig->parent; } + my $ref_pcontent = $parent->content; + my @pcontent = @{$ref_pcontent}; + for (my $i=0; $i<scalar(@pcontent); $i++) + { if ($pcontent[$i] eq $orig) + { $ {$ref_pcontent}[$i] = $new; + $new->parent($parent); + $orig->parent(undef); + return 1; } } + die "Didn't find $orig in $parent"; +} + +sub html_replace_by_meta ( $;$ ) +{ my ($orig, $parent) = check_args_range(1, 2, @_); + my $meta = new HTML::Element "meta"; + if (!defined $parent) + { $parent = $orig->parent; } + return html_replace($orig, $meta, $parent); +} + +sub html_replace_by_ignore ( $;$ ) +{ my ($orig, $parent) = check_args_range(1, 2, @_); + my $ignore = new HTML::Element "ignore"; + if (!defined $parent) + { $parent = $orig->parent; } + return html_replace($orig, $ignore, $parent); +} + + + +### +### Collect text elements +### + +my @collected_texts; +my $collect_texts_stoppoint; +my $done_collecting; + +sub collect_texts ( $;$ ) +{ my ($root, $stop) = check_args_range(1, 2, @_); + # print STDERR "collect_texts: $root $stop\n"; + $collect_texts_stoppoint = $stop; + $done_collecting = 0; + @collected_texts = (); + $root->traverse(\&collect_if_text); # process texts + # print STDERR "collect_texts => ", join(";;;", @collected_texts), "\n"; + return @collected_texts; +} + +sub collect_if_text ( $$$ ) +{ my $he = (check_args(3, @_))[0]; # ignore depth and startflag arguments + if ($done_collecting) + { return 0; } + if (!defined $he) + { return 0; } + if (!ref $he) + { push @collected_texts, $he; + return 0; } + if ((defined $collect_texts_stoppoint) && ($he eq $collect_texts_stoppoint)) + { $done_collecting = 1; + return 0; } + return 1; +} + + +########################################################################### +### Clean up parse tree +### + +sub cleanup_parse_tree ( $ ) +{ my ($he) = check_args(1, @_); + $he->traverse(\&delete_if_navigation, 'ignore text'); + $he->traverse(\&delete_extra_spaces, 'ignore text'); + $he->traverse(\&merge_dl, 'ignore text'); + $he->traverse(\&reorder_dt_and_dl, 'ignore text'); + return $he; +} + + +## Simpler version that deletes contents but not the element itself. +# sub delete_if_navigation ( $$$ ) +# { my $he = (check_args(3, @_))[0]; # ignore startflag and depth +# if (($he->tag() eq "div") && ($he->attr('class') eq 'navigation')) +# { $he->delete(); +# return 0; } +# else +# { return 1; } +# } + +sub delete_if_navigation ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + if (!$startflag) + { return; } + + if (($he->tag() eq "div") && (defined $he->attr('class')) && ($he->attr('class') eq 'navigation')) + { my $ref_pcontent = $he->parent()->content(); + # Don't try to modify @pcontent, which appears to be a COPY. + # my @pcontent = @{$ref_pcontent}; + for (my $i = 0; $i<scalar(@{$ref_pcontent}); $i++) + { if (${$ref_pcontent}[$i] eq $he) + { splice(@{$ref_pcontent}, $i, 1); + last; } } + $he->delete(); + return 0; } + else + { return 1; } +} + +sub delete_extra_spaces ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + if (!$startflag) + { return; } + + my $tag = $he->tag; + if ($tag =~ /^(head|html|table|tr|ul)$/) + { delete_child_spaces($he); } + delete_trailing_spaces($he); + return 1; +} + + +sub delete_child_spaces ( $ ) +{ my ($he) = check_args(1, @_); + my $ref_content = $he->content(); + for (my $i = 0; $i<scalar(@{$ref_content}); $i++) + { if ($ {$ref_content}[$i] =~ /^ *$/) + { splice(@{$ref_content}, $i, 1); + $i--; } } +} + +sub delete_trailing_spaces ( $ ) +{ my ($he) = check_args(1, @_); + my $ref_content = $he->content(); + if (! defined $ref_content) + { return; } + # Could also check for previous element = /^h[1-6]$/. + for (my $i = 0; $i<scalar(@{$ref_content})-1; $i++) + { if ($ {$ref_content}[$i] =~ /^ *$/) + { my $next_elt = $ {$ref_content}[$i+1]; + if ((ref $next_elt) && ($next_elt->tag =~ /^(br|dd|dl|dt|hr|p|ul)$/)) + { splice(@{$ref_content}, $i, 1); + $i--; } } } + if ($he->tag =~ /^(dd|dt|^h[1-6]|li|p)$/) + { my $last_elt = $ {$ref_content}[$#{$ref_content}]; + if ((defined $last_elt) && ($last_elt =~ /^ *$/)) + { pop @{$ref_content}; } } +} + + +# LaTeX2HTML sometimes creates +# <DT>text +# <DL COMPACT><DD>text +# which should actually be: +# <DL COMPACT> +# <DT>text +# <DD>text +# Since a <DL> gets added, this ends up looking like +# <P> +# <DL> +# <DT> +# text1... +# <DL COMPACT> +# <DD> +# text2... +# dt_or_dd1... +# dt_or_dd2... +# which should become +# <P> +# <DL COMPACT> +# <DT> +# text1... +# <DD> +# text2... +# dt_or_dd1... +# dt_or_dd2... + +sub reorder_dt_and_dl ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + if (!$startflag) + { return; } + + if ($he->tag() eq "p") + { my $ref_pcontent = $he->content(); + if (defined $ref_pcontent) + { my @pcontent = @{$ref_pcontent}; + # print "reorder_dt_and_dl found a <p>\n"; $he->dump(); + if ((scalar(@pcontent) >= 1) + && (ref $pcontent[0]) && ($pcontent[0]->tag() eq "dl") + && $pcontent[0]->implicit()) + { my $ref_dlcontent = $pcontent[0]->content(); + # print "reorder_dt_and_dl found a <p> and implicit <dl>\n"; + if (defined $ref_dlcontent) + { my @dlcontent = @{$ref_dlcontent}; + if ((scalar(@dlcontent) >= 1) + && (ref $dlcontent[0]) && ($dlcontent[0]->tag() eq "dt")) + { my $ref_dtcontent = $dlcontent[0]->content(); + # print "reorder_dt_and_dl found a <p>, implicit <dl>, and <dt>\n"; + if (defined $ref_dtcontent) + { my @dtcontent = @{$ref_dtcontent}; + if ((scalar(@dtcontent) > 0) + && (ref $dtcontent[$#dtcontent]) + && ($dtcontent[$#dtcontent]->tag() eq "dl")) + { my $ref_dl2content = $dtcontent[$#dtcontent]->content(); + # print "reorder_dt_and_dl found a <p>, implicit <dl>, <dt>, and <dl>\n"; + if (defined $ref_dl2content) + { my @dl2content = @{$ref_dl2content}; + if ((scalar(@dl2content) > 0) + && (ref ($dl2content[0])) + && ($dl2content[0]->tag() eq "dd")) + { + # print "reorder_dt_and_dl found a <p>, implicit <dl>, <dt>, <dl>, and <dd>\n"; + # print STDERR "CHANGING\n"; $he->dump(); + html_replace_by_ignore($dtcontent[$#dtcontent]); + splice(@{$ref_dlcontent}, 1, 0, @dl2content); + # print STDERR "CHANGED TO:\n"; $he->dump(); + return 0; # don't traverse children + } } } } } } } } } + return 1; +} + + +# If we find a paragraph that looks like +# <P> +# <HR> +# <UL> +# then accumulate its links into a contents_list and delete the paragraph. +sub process_if_child_links ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + if (!$startflag) + { return; } + + if ($he->tag() eq "p") + { my $ref_content = $he->content(); + if (defined $ref_content) + { my @content = @{$ref_content}; + if ((scalar(@content) == 2) + && (ref $content[0]) && $content[0]->tag() eq "hr" + && (ref $content[1]) && $content[1]->tag() eq "ul") + { process_child_links($he); + $he->delete(); + return 0; } } } + return 1; +} + + +# If we find +# <H4> +# "Footnotes" +# <DL> +# <DT> +# <A NAME="foot560"> +# "...borrow" +# <A HREF="refcountsInPython.html#tex2html2" NAME="foot560"> +# "1.2" +# <DD> +# "The metaphor of ``borrowing'' a reference is not completely correct: the owner still has a copy of the reference. " +# ... +# then record the footnote information and delete the section and list. + +my $process_if_footnotes_expect_dl_next = 0; + +sub process_if_footnotes ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + if (!$startflag) + { return; } + + if (($he->tag() eq "h4") + && has_single_content_string($he) + && ($ {$he->content}[0] eq "Footnotes")) + { html_replace_by_ignore($he); + $process_if_footnotes_expect_dl_next = 1; + return 0; } + + if ($process_if_footnotes_expect_dl_next && ($he->tag() eq "dl")) + { my $ref_content = $he->content(); + if (defined $ref_content) + { $process_if_footnotes_expect_dl_next = 0; + my @content = @{$ref_content}; + for (my $i=0; $i<$#content; $i+=2) + { my $he_dt = $content[$i]; + my $he_dd = $content[$i+1]; + if (($he_dt->tag ne "dt") || ($he_dd->tag ne "dd")) + { $he->dump; + die "expected <DT> and <DD> at positions $i and ", $i+1; } + my @dt_content = @{$he_dt->content()}; + if ((scalar(@dt_content) != 2) + || ($dt_content[0]->tag ne "a") + || ($dt_content[1]->tag ne "a")) + { $he_dt->dump; + die "Expected 2 anchors as content of <DT>"; } + my ($dt1_name, $dt1_href, $dt1_content) = anchor_info($dt_content[0]); + my ($dt2_name, $dt2_href, $dt2_content) = anchor_info($dt_content[0]); + # unused: $dt1_href, $dt1_content, $dt2_href, $dt2_content + if ($dt1_name ne $dt2_name) + { $he_dt->dump; + die "Expected identical names for anchors"; } + html_replace_by_ignore($he_dd); + $he_dd->tag("div"); # has no effect + $footnotes{$dt1_name} = $he_dd; } + html_replace_by_ignore($he); + return 0; } } + + if ($process_if_footnotes_expect_dl_next) + { $he->dump; + die "Expected <DL> for footnotes next"; } + + return 1; +} + + + +## Merge two adjacent paragraphs containing <DL> items, such as: +# <P> +# <DL> +# <DT> +# ... +# <DD> +# ... +# <P> +# <DL> +# <DT> +# ... +# <DD> +# ... + +sub merge_dl ( $$$ ) +{ my ($he, $startflag) = (check_args(3, @_))[0,1]; # ignore depth argument + if (!$startflag) + { return; } + + my $ref_content = $he->content; + if (!defined $ref_content) + { return; } + my $i = 0; + while ($i < scalar(@{$ref_content})-1) + { my $p1 = $ {$ref_content}[$i]; + if ((ref $p1) && ($p1->tag eq "p") + && has_single_content_with_tag($p1, "dl")) + { my $dl1 = $ {$p1->content}[0]; + # In this loop, rhs, not lhs, of < comparison changes, + # because we are removing elements from the content of $he. + while ($i < scalar(@{$ref_content})-1) + { my $p2 = $ {$ref_content}[$i+1]; + if (!((ref $p2) && ($p2->tag eq "p") + && has_single_content_with_tag($p2, "dl"))) + { last; } + # Merge these two elements. + splice(@{$ref_content}, $i+1, 1); # remove $p2 + my $dl2 = $ {$p2->content}[0]; + $dl1->push_content(@{$dl2->content}); # put $dl2's content in $dl1 + } + # extra increment because next element isn't a candidate for $p1 + $i++; } + $i++; } + return 1; +} + + + +########################################################################### +### Testing +### + +sub test ( $$ ) +{ my ($action, $file) = check_args(2, @_); + + # General testing + if (($action eq "view") || ($action eq "")) + { # # $file = "/homes/gws/mernst/www/links.html"; + # # $file = "/homes/gws/mernst/www/index.html"; + # # $file = "/homes/fish/mernst/java/gud/doc/manual.html"; + # # $file = "/projects/cecil/cecil/doc/manuals/stdlib-man/stdlib/stdlib.html"; + # # $file = "/homes/fish/mernst/tmp/python-doc/html/index.html"; + # $file = "/homes/fish/mernst/tmp/python-doc/html/api/complexObjects.html"; + my $tree = file_to_tree($file); + + ## Testing + # print STDERR $tree->as_HTML; + $tree->dump(); + + # print STDERR $tree->tag(), "\n"; + # print STDERR @{$tree->content()}, "\n"; + # + # for (@{ $tree->extract_links(qw(a img)) }) { + # my ($link, $linkelem) = @$_; + # print STDERR "$link ", $linkelem->as_HTML; + # } + # + # print STDERR @{$tree->extract_links()}, "\n"; + + # my @top_level_elts = @{$tree->content()}; + + # if scalar(@{$tree->content()}) + return; + } + + elsif ($action eq "raw") + { my $tree = new HTML::TreeBuilder; + $tree->ignore_unknown(1); + # $tree->warn(1); + $tree->parse_file($file); + + $tree->dump(); + + # cleanup_parse_tree($tree); + # $tree->dump(); + return; + } + + # Test dealing with a section. + elsif ($action eq "section") + { # my $file; + # $file = "/homes/fish/mernst/tmp/python-doc/html/api/intro.html"; + # $file = "/homes/fish/mernst/tmp/python-doc/html/api/includes.html"; + # $file = "/homes/fish/mernst/tmp/python-doc/html/api/complexObjects.html"; + process_section_file($file, 0, "Title"); + } + + # Test dealing with many sections + elsif (0) + { my @files = ("/homes/fish/mernst/tmp/python-doc/html/api/about.html", + "/homes/fish/mernst/tmp/python-doc/html/api/abstract.html", + "/homes/fish/mernst/tmp/python-doc/html/api/api.html", + "/homes/fish/mernst/tmp/python-doc/html/api/cObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/complexObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/concrete.html", + # "/homes/fish/mernst/tmp/python-doc/html/api/contents.html", + "/homes/fish/mernst/tmp/python-doc/html/api/countingRefs.html", + "/homes/fish/mernst/tmp/python-doc/html/api/debugging.html", + "/homes/fish/mernst/tmp/python-doc/html/api/dictObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/embedding.html", + "/homes/fish/mernst/tmp/python-doc/html/api/exceptionHandling.html", + "/homes/fish/mernst/tmp/python-doc/html/api/exceptions.html", + "/homes/fish/mernst/tmp/python-doc/html/api/fileObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/floatObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/front.html", + "/homes/fish/mernst/tmp/python-doc/html/api/fundamental.html", + # "/homes/fish/mernst/tmp/python-doc/html/api/genindex.html", + "/homes/fish/mernst/tmp/python-doc/html/api/importing.html", + "/homes/fish/mernst/tmp/python-doc/html/api/includes.html", + "/homes/fish/mernst/tmp/python-doc/html/api/index.html", + "/homes/fish/mernst/tmp/python-doc/html/api/initialization.html", + "/homes/fish/mernst/tmp/python-doc/html/api/intObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/intro.html", + "/homes/fish/mernst/tmp/python-doc/html/api/listObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/longObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/mapObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/mapping.html", + "/homes/fish/mernst/tmp/python-doc/html/api/newTypes.html", + "/homes/fish/mernst/tmp/python-doc/html/api/node24.html", + "/homes/fish/mernst/tmp/python-doc/html/api/noneObject.html", + "/homes/fish/mernst/tmp/python-doc/html/api/number.html", + "/homes/fish/mernst/tmp/python-doc/html/api/numericObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/object.html", + "/homes/fish/mernst/tmp/python-doc/html/api/objects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/os.html", + "/homes/fish/mernst/tmp/python-doc/html/api/otherObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/processControl.html", + "/homes/fish/mernst/tmp/python-doc/html/api/refcountDetails.html", + "/homes/fish/mernst/tmp/python-doc/html/api/refcounts.html", + "/homes/fish/mernst/tmp/python-doc/html/api/sequence.html", + "/homes/fish/mernst/tmp/python-doc/html/api/sequenceObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/standardExceptions.html", + "/homes/fish/mernst/tmp/python-doc/html/api/stringObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/threads.html", + "/homes/fish/mernst/tmp/python-doc/html/api/tupleObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/typeObjects.html", + "/homes/fish/mernst/tmp/python-doc/html/api/types.html", + "/homes/fish/mernst/tmp/python-doc/html/api/utilities.html", + "/homes/fish/mernst/tmp/python-doc/html/api/veryhigh.html"); + for my $file (@files) + { print STDERR "\n", "=" x 75, "\n", "$file:\n"; + process_section_file($file, 0, "Title"); + } + } + + # Test dealing with index. + elsif ($action eq "index") + { # my $file; + # $file = "/homes/fish/mernst/tmp/python-doc/html/api/genindex.html"; + + process_index_file($file, "\@cindex"); + print_index_info(); + } + + else + { die "Unrecognized action `$action'"; } +} + + +########################################################################### +### Main loop +### + +sub process_contents_file ( $ ) +{ my ($file) = check_args(1, @_); + + # could also use File::Basename + my $info_file = $file; + $info_file =~ s/(\/?index)?\.html$//; + if ($info_file eq "") + { chomp($info_file = `pwd`); } + $info_file =~ s/^.*\///; # not the most efficient way to remove dirs + + $html_directory = $file; + $html_directory =~ s/(\/|^)[^\/]+$/$1/; + + my $texi_file = "$info_file.texi"; + open(TEXI, ">$texi_file"); + + print TEXI "\\input texinfo \@c -*-texinfo-*-\n"; + print TEXI "\@c %**start of header\n"; + print TEXI "\@setfilename $info_file\n"; + + # 2. Summary Description and Copyright + # The "Summary Description and Copyright" segment describes the + # document and contains the copyright notice and copying permissions + # for the Info file. The segment must be enclosed between `@ifinfo' + # and `@end ifinfo' commands so that the formatters place it only in + # the Info file. + # + # The summary description and copyright segment does not appear in the + # printed document. + # + # @ifinfo + # This is a short example of a complete Texinfo file. + # + # Copyright @copyright{} 1990 Free Software Foundation, Inc. + # @end ifinfo + + + # 3. Title and Copyright + # The "Title and Copyright" segment contains the title and copyright + # pages and copying permissions for the printed manual. The segment + # must be enclosed between `@titlepage' and `@end titlepage' + # commands. The title and copyright page appear only in the printed + # manual. + # + # The titlepage segment does not appear in the Info file. + # + # @titlepage + # @sp 10 + # @comment The title is printed in a large font. + # @center @titlefont{Sample Title} + # + # @c The following two commands start the copyright page. + # @page + # @vskip 0pt plus 1filll + # Copyright @copyright{} 1990 Free Software Foundation, Inc. + # @end titlepage + + + # 4. `Top' Node and Master Menu + # The "Master Menu" contains a complete menu of all the nodes in the + # whole Info file. It appears only in the Info file, in the `Top' + # node. + # + # The `Top' node contains the master menu for the Info file. Since a + # printed manual uses a table of contents rather than a menu, the master + # menu appears only in the Info file. + # + # @node Top, First Chapter, , (dir) + # @comment node-name, next, previous, up + # + # @menu + # * First Chapter:: The first chapter is the + # only chapter in this sample. + # * Concept Index:: This index has two entries. + # @end menu + + + + $current_ref_tdf = [ "Top", 0, $ARGV[0] ]; + process_section_file($file, 0, "Top"); + while (scalar(@contents_list)) + { $current_ref_tdf = shift @contents_list; + process_section_file($ {$current_ref_tdf}[2], $ {$current_ref_tdf}[1], $ {$current_ref_tdf}[0]); + } + + print TEXI "\n"; + for my $indextitle (@index_titles) + { print TEXI "\@node $indextitle\n"; + print TEXI "\@unnumbered $indextitle\n"; + print TEXI "\@printindex $ {$index_info{$indextitle}}[1]\n"; + print TEXI "\n"; } + + print TEXI "\@contents\n"; + print TEXI "\@bye\n"; + close(TEXI); +} + +# This needs to be last so global variable initializations are reached. + +if (scalar(@ARGV) == 0) +{ die "No arguments supplied to html2texi.pl"; } + +if ($ARGV[0] eq "-test") +{ my @test_args = @ARGV[1..$#ARGV]; + if (scalar(@test_args) == 0) + { test("", "index.html"); } + elsif (scalar(@test_args) == 1) + { test("", $test_args[0]); } + elsif (scalar(@test_args) == 2) + { test($test_args[0], $test_args[1]); } + else + { die "Too many test arguments passed to html2texi: ", join(" ", @ARGV); } + exit(); +} + +if (scalar(@ARGV) != 1) +{ die "Pass one argument, the main/contents page"; } + +process_contents_file($ARGV[0]); + +# end of html2texi.pl diff --git a/sys/src/cmd/python/Doc/tools/indfix.py b/sys/src/cmd/python/Doc/tools/indfix.py new file mode 100755 index 000000000..f88c3f37d --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/indfix.py @@ -0,0 +1,100 @@ +#! /usr/bin/env python + +"""Combine similar index entries into an entry and subentries. + +For example: + + \item {foobar} (in module flotz), 23 + \item {foobar} (in module whackit), 4323 + +becomes + + \item {foobar} + \subitem in module flotz, 23 + \subitem in module whackit, 4323 + +Note that an item which matches the format of a collapsable item but which +isn't part of a group of similar items is not modified. +""" +__version__ = '$Revision: 29268 $' + +import re +import StringIO +import sys + + +def cmp_entries(e1, e2): + return cmp(e1[1].lower(), e2[1].lower()) or cmp(e1, e2) + + +def dump_entries(write, entries): + if len(entries) == 1: + write(" \\item %s (%s)%s\n" % entries[0]) + return + write(" \item %s\n" % entries[0][0]) + # now sort these in a case insensitive manner: + if len(entries) > 0: + entries.sort(cmp_entries) + for xxx, subitem, pages in entries: + write(" \subitem %s%s\n" % (subitem, pages)) + + +breakable_re = re.compile( + r" \\item (.*) [(](.*)[)]((?:(?:, \d+)|(?:, \\[a-z]*\{\d+\}))+)") + + +def process(ifn, ofn=None): + if ifn == "-": + ifp = sys.stdin + else: + ifp = open(ifn) + if ofn is None: + ofn = ifn + ofp = StringIO.StringIO() + entries = [] + match = breakable_re.match + write = ofp.write + while 1: + line = ifp.readline() + if not line: + break + m = match(line) + if m: + entry = m.group(1, 2, 3) + if entries and entries[-1][0] != entry[0]: + dump_entries(write, entries) + entries = [] + entries.append(entry) + elif entries: + dump_entries(write, entries) + entries = [] + write(line) + else: + write(line) + del write + del match + ifp.close() + data = ofp.getvalue() + ofp.close() + if ofn == "-": + ofp = sys.stdout + else: + ofp = open(ofn, "w") + ofp.write(data) + ofp.close() + + +def main(): + import getopt + outfile = None + opts, args = getopt.getopt(sys.argv[1:], "o:") + for opt, val in opts: + if opt in ("-o", "--output"): + outfile = val + filename = args[0] + outfile = outfile or filename + process(filename, outfile) + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/keywords.py b/sys/src/cmd/python/Doc/tools/keywords.py new file mode 100644 index 000000000..9f32056db --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/keywords.py @@ -0,0 +1,19 @@ +#! /usr/bin/env python + +# This Python program sorts and reformats the table of keywords in ref2.tex + +l = [] +try: + while 1: + l = l + raw_input().split() +except EOFError: + pass +l.sort() +for x in l[:]: + while l.count(x) > 1: l.remove(x) +ncols = 5 +nrows = (len(l)+ncols-1)/ncols +for i in range(nrows): + for j in range(i, len(l), nrows): + print l[j].ljust(10), + print diff --git a/sys/src/cmd/python/Doc/tools/listmodules b/sys/src/cmd/python/Doc/tools/listmodules new file mode 100755 index 000000000..506bde33b --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/listmodules @@ -0,0 +1,183 @@ +#! /usr/bin/env python +# -*- Python -*- +# +# This script can be used to identify undocumented modules in the Python +# standard library. Use it like this: +# +# .../Doc/tools/listmodules --ignore-from .../Doc/paper-<paper>/modlib.idx + +"""%(program)s - list modules in the Python standard library + +-a, --annotate Annotate the module names with the subdirectory they + live in +-c, --categorize Group the modules by subdirectory +-i <file>, + +--ignore-from <file> Ignore the modules listed in <file>. <file> may + contain a list of module names or a module index file + as produced when formatting the Python documentation + (.idx or .html flavor). + +If neither -a nor -c are given, the modules are listed in alphabetical +order. + +Note that -a and -c are mutually exclusive. + +Limitation: Modules loadable as shared objects may not be listed, +though this script attempts to locate such modules. + +""" + +__version__ = '$Revision: 18276 $' + +import getopt +import glob +import os +import re +import string +import sys + + +REMOVE_DIRS = ["dos-8x3", "encodings", "distutils", + "lib-old", "lib-stdwin", "test"] + + +def main(): + args = sys.argv[1:] + annotate = 0 + builtin = 0 + categorize = 0 + ignore_dict = {} + ignore = ignore_dict.has_key + try: + opts, args = getopt.getopt( + args, "abchi:", + ["annotate", "built-in", "categorize", "help", "ignore-from="]) + except getopt.error, msg: + sys.stdout = sys.stderr + print msg + print + usage() + sys.exit(2) + for opt, arg in opts: + if opt in ("-a", "--annotate"): + annotate = 1 + elif opt in ("-b", "--built-in"): + builtin = 1 + elif opt in ("-c", "--categorize"): + categorize = 1 + elif opt in ("-h", "--help"): + usage() + sys.exit() + elif opt in ("-i", "--ignore-from"): + data = open(arg).read() + if data[:1] == "\\": + ignore_from_idx(data, ignore_dict) + else: + ignore_from_modulelist(data, ignore_dict) + if args or (annotate and categorize): + usage() + sys.exit(2) + # + # Populate the database: + # + srcdir = os.path.normpath(os.path.join( + os.path.dirname(sys.argv[0]), os.pardir, os.pardir)) + os.chdir(srcdir) + modules_by_name = {} + modules_by_dir = {} + if builtin: + l = [] + modules_by_dir["<builtin>"] = l + for name in sys.builtin_module_names: + if not ignore(name): + modules_by_name[name] = "<built-in>" + l.append(name) + rx = re.compile("Lib/plat-[a-zA-Z0-9]*/") + fp = os.popen("find Lib -name \*.py -print", "r") + while 1: + line = fp.readline() + if not line: + break + m = rx.match(line) + if m: + line = "Lib/plat-*/" + line[m.end():] + line = line[4:-4] # strip off 'Lib/' and '.py\n' + dir, name = os.path.split(line) + dir = dir or "<standard>" + if ignore(name): + continue + if dir not in REMOVE_DIRS: + modules_by_name[name] = dir + l = modules_by_dir.get(dir, []) + modules_by_dir[dir] = l + if name not in l: + l.append(name) + # load up extension modules: + pwd = os.getcwd() + try: + os.chdir("Modules") + dir = "<extension>" + for line in glob.glob("*module.c"): + name = line[:-8] + if ignore(name) or modules_by_name.has_key(name) or name == "xx": + continue + modules_by_name[name] = dir + l = modules_by_dir.get(dir, []) + modules_by_dir[dir] = l + if name not in l: + l.append(name) + finally: + os.chdir(pwd) + # + # Dump the results: + # + if annotate: + modules = modules_by_name.items() + modules.sort() + width = max(map(len, modules_by_name.keys())) + format = "%%-%ds %%s" % width + for name, dir in modules: + if dir and dir[0] != "<": + print format % (name, dir) + else: + print name + elif categorize: + modules = modules_by_dir.items() + modules.sort() + width = max(map(len, modules_by_dir.keys())) + format = "%%-%ds %%s" % width + for dir, names in modules: + names.sort() + print format % (dir, names[0]) + for name in names[1:]: + print format % ('', name) + print + else: + modules = modules_by_name.keys() + modules.sort() + print string.join(modules, "\n") + + +def ignore_from_modulelist(data, ignore_dict): + for name in string.split(data): + ignore_dict[name] = name + +def ignore_from_idx(data, ignore_dict): + data = string.replace(data, r"\hackscore {}", "_") + rx = re.compile(r"\\indexentry\s*{([^@]*)@") + for line in string.split(data, "\n"): + m = rx.match(line) + if m: + name = m.group(1) + ignore_dict[name] = name + + +def usage(): + vars = {} + vars["program"] = os.path.basename(sys.argv[0]) + print __doc__ % vars + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/listmodules.py b/sys/src/cmd/python/Doc/tools/listmodules.py new file mode 100644 index 000000000..67099bf8c --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/listmodules.py @@ -0,0 +1,126 @@ +# $Id: listmodules.py 42172 2006-01-24 16:16:19Z fredrik.lundh $ +# +# Locate all standard modules available in this build. +# +# This script is designed to run on Python 1.5.2 and newer. +# +# Written by Fredrik Lundh, January 2005 +# + +import imp, sys, os, re, time + +identifier = "python-%s-%s" % (sys.version[:3], sys.platform) +timestamp = time.strftime("%Y%m%dT%H%M%SZ", time.gmtime(time.time())) + +# known test packages +TEST_PACKAGES = "test.", "bsddb.test.", "distutils.tests." + +try: + import platform + platform = platform.platform() +except: + platform = None # unknown + +suffixes = imp.get_suffixes() + +def get_suffix(file): + for suffix in suffixes: + if file[-len(suffix[0]):] == suffix[0]: + return suffix + return None + +def main(): + + path = getpath() + + modules = {} + for m in sys.builtin_module_names: + modules[m] = None + + for p in path: + modules.update(getmodules(p)) + + keys = modules.keys() + keys.sort() + + # filter out known test packages + def cb(m): + for d in TEST_PACKAGES: + if m[:len(d)] == d: + return 0 + return 1 + keys = filter(cb, keys) + + try: + outfile = sys.argv[1] + if outfile == "-": + outfile = None + elif outfile == "-f": + outfile = "modules-" + identifier + ".txt" + except IndexError: + outfile = None + + if not outfile: + out = sys.stdout + else: + out = open(outfile, "w") + + out.write("# module list (generated by listmodules.py)\n") + out.write("#\n") + out.write("# timestamp=%s\n" % repr(timestamp)) + out.write("# sys.version=%s\n" % repr(sys.version)) + out.write("# sys.platform=%s\n" % repr(sys.platform)) + if platform: + out.write("# platform=%s\n" % repr(platform)) + out.write("#\n") + + for k in keys: + out.write(k + "\n") + + if out is not sys.stdout: + out.close() + print out.name, "ok (%d modules)" % len(modules) + +def getmodules(p): + # get modules in a given directory + modules = {} + for f in os.listdir(p): + f = os.path.join(p, f) + if os.path.isfile(f): + m, e = os.path.splitext(f) + suffix = get_suffix(f) + if not suffix: + continue + m = os.path.basename(m) + if re.compile("(?i)[a-z_]\w*$").match(m): + if suffix[2] == imp.C_EXTENSION: + # check that this extension can be imported + try: + __import__(m) + except ImportError: + continue + modules[m] = f + elif os.path.isdir(f): + m = os.path.basename(f) + if os.path.isfile(os.path.join(f, "__init__.py")): + for mm, f in getmodules(f).items(): + modules[m + "." + mm] = f + return modules + +def getpath(): + path = map(os.path.normcase, map(os.path.abspath, sys.path[:])) + # get rid of site packages + for p in path: + if p[-13:] == "site-packages": + def cb(p, site_package_path=os.path.abspath(p)): + return p[:len(site_package_path)] != site_package_path + path = filter(cb, path) + break + # get rid of non-existent directories and the current directory + def cb(p, cwd=os.path.normcase(os.getcwd())): + return os.path.isdir(p) and p != cwd + path = filter(cb, path) + return path + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/makesec.sh b/sys/src/cmd/python/Doc/tools/makesec.sh new file mode 100755 index 000000000..6159d6fa8 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/makesec.sh @@ -0,0 +1,129 @@ +#!/bin/sh + +# Simple little checker for individual libref manual sections +# +# usage: makesec.sh section +# + +# This script builds the minimal file necessary to run a single section +# through latex, does so, then converts the resulting dvi file to ps or pdf +# and feeds the result into a viewer. It's by no means foolproof, but seems +# to work okay for me (knock wood). It sure beats manually commenting out +# most of the man lib.tex file and running everything manually. + +# It attempts to locate an appropriate dvi converter and viewer for the +# selected output format. It understands the following environment +# variables: +# +# PYSRC - refers to the root of your build tree (dir containing Doc) +# DVICVT - refers to a dvi converter like dvips or dvipdf +# VIEWER - refers to an appropriate viewer for the ps/pdf file +# +# Of the three, only PYSRC is currently required. The other two can be set +# to specify unusual tools which perform those tasks. + +# Known issues: +# - It would be nice if the script could determine PYSRC on its own. +# - Something about \seealso{}s blows the latex stack, so they need +# to be commented out for now. + +if [ x$PYSRC = x ] ; then + echo "PYSRC must refer to the Python source tree" 1>&2 + exit 1 +fi + +if [ ! -d $PYSRC/Doc ] ; then + echo "Can't find a Doc subdirectory in $PYSRC" 1>&2 + exit 1 +fi + +if [ "$#" -ne 1 ] ; then + echo "Must specify a single libref manual section on cmd line" 1>&2 + exit 1 +fi + +# settle on a dvi converter +if [ x$DVICVT != x ] ; then + converter=$DVICVT + ext=`echo $DVICVT | sed -e 's/^dvi//'` + result=lib.$ext +elif [ x`which dvipdf` != x ] ; then + converter=`which dvipdf` + ext=.pdf +elif [ x`which dvips` != x ] ; then + converter=`which dvips` + ext=.ps +else + echo "Can't find a reasonable dvi converter" 1>&2 + echo "Set DVICVT to refer to one" 1>&2 + exit 1 +fi + +# how about a viewer? +if [ x$VIEWER != x ] ; then + viewer=$VIEWER +elif [ $ext = ".ps" -a x`which gv` != x ] ; then + viewer=gv +elif [ $ext = ".ps" -a x`which gs` != x ] ; then + viewer=gs +elif [ $ext = ".pdf" -a x`which acroread` != x ] ; then + viewer=acroread +elif [ $ext = ".pdf" -a "`uname`" = "Darwin" -a x`which open` != x ] ; then + viewer=open +elif [ $ext = ".pdf" -a x`which acroread` != x ] ; then + viewer=acroread +else + echo "Can't find a reasonable viewer" 1>&2 + echo "Set VIEWER to refer to one" 1>&2 + exit 1 +fi + +# make sure necessary links are in place +for f in howto.cls pypaper.sty ; do + rm -f $f + ln -s $PYSRC/Doc/$f +done + +export TEXINPUTS=.:$PYSRC/Doc/texinputs: + +# strip extension in case they gave full filename +inp=`basename $1 .tex` + +# create the minimal framework necessary to run section through latex +tmpf=lib.tex +cat > $tmpf <<EOF +\documentclass{manual} + +% NOTE: this file controls which chapters/sections of the library +% manual are actually printed. It is easy to customize your manual +% by commenting out sections that you are not interested in. + +\title{Python Library Reference} + +\input{boilerplate} + +\makeindex % tell \index to actually write the + % .idx file +\makemodindex % ... and the module index as well. + + +\begin{document} + +\maketitle + +\ifhtml +\chapter*{Front Matter\label{front}} +\fi + +\input{$inp} +\end{document} +EOF + +latex $tmpf + +$converter lib + +$viewer lib.pdf + +rm -f $tmpf howto.cls pypaper.sty *.idx *.syn +rm -f lib.aux lib.log diff --git a/sys/src/cmd/python/Doc/tools/mkackshtml b/sys/src/cmd/python/Doc/tools/mkackshtml new file mode 100755 index 000000000..2c79f5eb1 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/mkackshtml @@ -0,0 +1,66 @@ +#! /usr/bin/env python +# -*- Python -*- + +import string +import support +import sys + + +def collect(fp): + names = [] + while 1: + line = fp.readline() + if not line: + break + line = string.strip(line) + if line: + names.append(line) + else: + names = [] + return names + + +def main(): + options = support.Options() + options.columns = 4 + options.variables["title"] = "Acknowledgements" + options.parse(sys.argv[1:]) + names = collect(sys.stdin) + percol = (len(names) + options.columns - 1) / options.columns + colnums = [] + for i in range(options.columns): + colnums.append(percol*i) + options.aesop_type = "information" + fp = options.get_output_file() + fp.write(string.rstrip(options.get_header()) + "\n") + fp.write(THANKS + "\n") + fp.write('<table width="100%" align="center">\n') + for i in range(percol): + fp.write(" <tr>\n") + for j in colnums: + try: + fp.write(" <td>%s</td>\n" % names[i + j]) + except IndexError: + pass + fp.write(" </tr>\n") + fp.write("</table>\n") + fp.write(string.rstrip(options.get_footer()) + "\n") + fp.close() + +THANKS = '''\ + +<p>These people have contributed in some way to the Python +documentation. This list is probably not complete -- if you feel that +you or anyone else should be on this list, please let us know (send +email to <a +href="mailto:docs@python.org">docs@python.org</a>), and +we will be glad to correct the problem.</p> + +<p>It is only with the input and contributions of the Python community +that Python has such wonderful documentation -- <b>Thank You!</b></p> + +''' + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/mkhowto b/sys/src/cmd/python/Doc/tools/mkhowto new file mode 100755 index 000000000..21cd6fb27 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/mkhowto @@ -0,0 +1,659 @@ +#! /usr/bin/env python +# -*- Python -*- +"""usage: %(program)s [options...] file ... + +Options specifying formats to build: + --html HyperText Markup Language (default) + --pdf Portable Document Format + --ps PostScript + --dvi 'DeVice Indepentent' format from TeX + --text ASCII text (requires lynx) + + More than one output format may be specified, or --all. + +HTML options: + --address, -a Specify an address for page footers. + --dir Specify the directory for HTML output. + --link Specify the number of levels to include on each page. + --split, -s Specify a section level for page splitting, default: %(max_split_depth)s. + --iconserver, -i Specify location of icons (default: ./). + --image-type Specify the image type to use in HTML output; + values: gif, png (default). + --numeric Don't rename the HTML files; just keep node#.html for + the filenames. + --style Specify the CSS file to use for the output (filename, + not a URL). + --up-link URL to a parent document. + --up-title Title of a parent document. + --favicon Icon to display in the browsers location bar. + +Other options: + --a4 Format for A4 paper. + --letter Format for US letter paper (the default). + --help, -H Show this text. + --logging, -l Log stdout and stderr to a file (*.how). + --debugging, -D Echo commands as they are executed. + --keep, -k Keep temporary files around. + --quiet, -q Do not print command output to stdout. + (stderr is also lost, sorry; see *.how for errors) +""" + +import getopt +import glob +import os +import re +import shutil +import sys + + +MYDIR = os.path.abspath(sys.path[0]) +TOPDIR = os.path.dirname(MYDIR) + +ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist") +NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl") +L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl") + +BIBTEX_BINARY = "bibtex" +DVIPS_BINARY = "dvips" +LATEX_BINARY = "latex" +LATEX2HTML_BINARY = "latex2html" +LYNX_BINARY = "lynx" +MAKEINDEX_BINARY = "makeindex" +PDFLATEX_BINARY = "pdflatex" +PERL_BINARY = "perl" +PYTHON_BINARY = "python" + + +def usage(options, file): + print >>file, __doc__ % options + +def error(options, message, err=2): + print >>sys.stderr, message + print >>sys.stderr + usage(options, sys.stderr) + sys.exit(2) + + +class Options: + program = os.path.basename(sys.argv[0]) + # + address = '' + builddir = None + debugging = 0 + discard_temps = 1 + have_temps = 0 + icon_server = "." + image_type = "png" + logging = 0 + max_link_depth = 3 + max_split_depth = 6 + paper = "letter" + quiet = 0 + runs = 0 + numeric = 0 + global_module_index = None + style_file = os.path.join(TOPDIR, "html", "style.css") + about_file = os.path.join(TOPDIR, "html", "about.dat") + up_link = None + up_title = None + favicon = None + # + # 'dvips_safe' is a weird option. It is used mostly to make + # LaTeX2HTML not try to be too smart about protecting the user + # from a bad version of dvips -- some versions would core dump if + # the path to the source DVI contained a dot, and it's appearantly + # difficult to determine if the version available has that bug. + # This option gets set when PostScript output is requested + # (because we're going to run dvips regardless, and we'll either + # know it succeeds before LaTeX2HTML is run, or we'll have + # detected the failure and bailed), or the user asserts that it's + # safe from the command line. + # + # So, why does LaTeX2HTML think it appropriate to protect the user + # from a dvips that's only potentially going to core dump? Only + # because they want to avoid doing a lot of work just to have to + # bail later with no useful intermediates. Unfortunately, they + # bail *before* they know whether dvips will be needed at all. + # I've gone around the bush a few times with the LaTeX2HTML + # developers over whether this is appropriate behavior, and they + # don't seem interested in changing their position. + # + dvips_safe = 0 + # + DEFAULT_FORMATS = ("html",) + ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text") + + def __init__(self): + self.formats = [] + self.l2h_init_files = [] + + def __getitem__(self, key): + # This is used when formatting the usage message. + try: + return getattr(self, key) + except AttributeError: + raise KeyError, key + + def parse(self, args): + opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:", + ["all", "postscript", "help", "iconserver=", + "address=", "a4", "letter", "l2h-init=", + "link=", "split=", "logging", "debugging", + "keep", "quiet", "runs=", "image-type=", + "about=", "numeric", "style=", "paper=", + "up-link=", "up-title=", "dir=", + "global-module-index=", "dvips-safe", + "favicon="] + + list(self.ALL_FORMATS)) + for opt, arg in opts: + if opt == "--all": + self.formats = list(self.ALL_FORMATS) + self.dvips_safe = "ps" in self.formats + elif opt in ("-H", "--help"): + usage(self, sys.stdout) + sys.exit() + elif opt == "--iconserver": + self.icon_server = arg + elif opt in ("-a", "--address"): + self.address = arg + elif opt == "--a4": + self.paper = "a4" + elif opt == "--letter": + self.paper = "letter" + elif opt == "--link": + self.max_link_depth = int(arg) + elif opt in ("-s", "--split"): + self.max_split_depth = int(arg) + elif opt in ("-l", "--logging"): + self.logging = self.logging + 1 + elif opt in ("-D", "--debugging"): + self.debugging = self.debugging + 1 + elif opt in ("-k", "--keep"): + self.discard_temps = 0 + elif opt in ("-q", "--quiet"): + self.quiet = 1 + elif opt in ("-r", "--runs"): + self.runs = int(arg) + elif opt == "--image-type": + self.image_type = arg + elif opt == "--about": + # always make this absolute: + self.about_file = os.path.normpath( + os.path.abspath(arg)) + elif opt == "--numeric": + self.numeric = 1 + elif opt == "--style": + self.style_file = os.path.abspath(arg) + elif opt == "--l2h-init": + self.l2h_init_files.append(os.path.abspath(arg)) + elif opt == "--favicon": + self.favicon = arg + elif opt == "--up-link": + self.up_link = arg + elif opt == "--up-title": + self.up_title = arg + elif opt == "--global-module-index": + self.global_module_index = arg + elif opt == "--dir": + if os.sep == "\\": + arg = re.sub("/", "\\\\", arg) + self.builddir = os.path.expanduser(arg) + elif opt == "--paper": + self.paper = arg + elif opt == "--dvips-safe": + self.dvips_safe = 1 + # + # Format specifiers: + # + elif opt[2:] in self.ALL_FORMATS: + self.add_format(opt[2:]) + elif opt == "--postscript": + # synonym for --ps + self.add_format("ps") + self.initialize() + # + # return the args to allow the caller access: + # + return args + + def add_format(self, format): + """Add a format to the formats list if not present.""" + if not format in self.formats: + if format == "ps": + # assume this is safe since we're going to run it anyway + self.dvips_safe = 1 + self.formats.append(format) + + def initialize(self): + """Complete initialization. This is needed if parse() isn't used.""" + # add the default format if no formats were specified: + if not self.formats: + self.formats = self.DEFAULT_FORMATS + # determine the base set of texinputs directories: + texinputs = os.environ.get("TEXINPUTS", "").split(os.pathsep) + if not texinputs: + texinputs = [''] + mydirs = [os.path.join(TOPDIR, "paper-" + self.paper), + os.path.join(TOPDIR, "texinputs"), + ] + if '' in texinputs: + i = texinputs.index('') + texinputs[i:i] = mydirs + else: + texinputs += mydirs + self.base_texinputs = texinputs + if self.builddir: + self.builddir = os.path.abspath(self.builddir) + + +class Job: + latex_runs = 0 + + def __init__(self, options, path): + self.options = options + self.doctype = get_doctype(path) + self.filedir, self.doc = split_pathname(path) + self.builddir = os.path.abspath(options.builddir or self.doc) + if ("html" in options.formats or "text" in options.formats): + if not os.path.exists(self.builddir): + os.mkdir(self.builddir) + self.log_filename = os.path.join(self.builddir, self.doc + ".how") + else: + self.log_filename = os.path.abspath(self.doc + ".how") + if os.path.exists(self.log_filename): + os.unlink(self.log_filename) + l2hconf = self.doc + ".l2h" + if os.path.exists(l2hconf): + if os.path.exists(l2hconf + "~"): + os.unlink(l2hconf + "~") + os.rename(l2hconf, l2hconf + "~") + self.l2h_aux_init_file = self.doc + ".l2h" + self.write_l2h_aux_init_file() + + def build(self): + self.setup_texinputs() + formats = self.options.formats + if "dvi" in formats or "ps" in formats: + self.build_dvi() + if "pdf" in formats: + self.build_pdf() + if "ps" in formats: + self.build_ps() + if "html" in formats: + self.require_temps() + self.build_html(self.builddir) + if self.options.icon_server == ".": + pattern = os.path.join(TOPDIR, "html", "icons", + "*." + self.options.image_type) + imgs = glob.glob(pattern) + if not imgs: + self.warning( + "Could not locate support images of type %s." + % `self.options.image_type`) + for fn in imgs: + new_fn = os.path.join(self.builddir, os.path.basename(fn)) + shutil.copyfile(fn, new_fn) + if "text" in formats: + self.require_temps() + tempdir = self.doc + need_html = "html" not in formats + if self.options.max_split_depth != 1: + fp = open(self.l2h_aux_init_file, "a") + fp.write("# re-hack this file for --text:\n") + l2hoption(fp, "MAX_SPLIT_DEPTH", "1") + fp.write("1;\n") + fp.close() + tempdir = self.doc + "-temp-html" + need_html = 1 + if need_html: + self.build_html(tempdir, max_split_depth=1) + self.build_text(tempdir) + if self.options.discard_temps: + self.cleanup() + + def setup_texinputs(self): + texinputs = [self.filedir] + self.options.base_texinputs + os.environ["TEXINPUTS"] = os.pathsep.join(texinputs) + self.message("TEXINPUTS=" + os.environ["TEXINPUTS"]) + + def build_aux(self, binary=None): + if binary is None: + binary = LATEX_BINARY + new_index( "%s.ind" % self.doc, "genindex") + new_index("mod%s.ind" % self.doc, "modindex") + self.run("%s %s" % (binary, self.doc)) + self.use_bibtex = check_for_bibtex(self.doc + ".aux") + self.latex_runs = 1 + + def build_dvi(self): + self.use_latex(LATEX_BINARY) + + def build_pdf(self): + self.use_latex(PDFLATEX_BINARY) + + def use_latex(self, binary): + self.require_temps(binary=binary) + if self.latex_runs < 2: + if os.path.isfile("mod%s.idx" % self.doc): + self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc)) + use_indfix = 0 + if os.path.isfile(self.doc + ".idx"): + use_indfix = 1 + # call to Doc/tools/fix_hack omitted; doesn't appear necessary + self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc)) + import indfix + indfix.process(self.doc + ".ind") + if self.use_bibtex: + self.run("%s %s" % (BIBTEX_BINARY, self.doc)) + self.process_synopsis_files() + self.run("%s %s" % (binary, self.doc)) + self.latex_runs = self.latex_runs + 1 + if os.path.isfile("mod%s.idx" % self.doc): + self.run("%s -s %s mod%s.idx" + % (MAKEINDEX_BINARY, ISTFILE, self.doc)) + if use_indfix: + self.run("%s -s %s %s.idx" + % (MAKEINDEX_BINARY, ISTFILE, self.doc)) + indfix.process(self.doc + ".ind") + self.process_synopsis_files() + # + # and now finish it off: + # + if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY: + import toc2bkm + if self.doctype == "manual": + bigpart = "chapter" + else: + bigpart = "section" + toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart) + if self.use_bibtex: + self.run("%s %s" % (BIBTEX_BINARY, self.doc)) + self.run("%s %s" % (binary, self.doc)) + self.latex_runs = self.latex_runs + 1 + + def process_synopsis_files(self): + synopsis_files = glob.glob(self.doc + "*.syn") + for path in synopsis_files: + uniqify_module_table(path) + + def build_ps(self): + self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc)) + + def build_html(self, builddir, max_split_depth=None): + if max_split_depth is None: + max_split_depth = self.options.max_split_depth + texfile = None + for p in os.environ["TEXINPUTS"].split(os.pathsep): + fn = os.path.join(p, self.doc + ".tex") + if os.path.isfile(fn): + texfile = fn + break + if not texfile: + self.warning("Could not locate %s.tex; aborting." % self.doc) + sys.exit(1) + # remove leading ./ (or equiv.); might avoid problems w/ dvips + if texfile[:2] == os.curdir + os.sep: + texfile = texfile[2:] + # build the command line and run LaTeX2HTML: + if not os.path.isdir(builddir): + os.mkdir(builddir) + else: + for fname in glob.glob(os.path.join(builddir, "*.html")): + os.unlink(fname) + args = [LATEX2HTML_BINARY, + "-init_file", self.l2h_aux_init_file, + "-dir", builddir, + texfile + ] + self.run(" ".join(args)) # XXX need quoting! + # ... postprocess + shutil.copyfile(self.options.style_file, + os.path.join(builddir, self.doc + ".css")) + shutil.copyfile(os.path.join(builddir, self.doc + ".html"), + os.path.join(builddir, "index.html")) + if max_split_depth != 1: + label_file = os.path.join(builddir, "labels.pl") + fp = open(label_file) + about_node = None + target = " = q/about/;\n" + x = len(target) + while 1: + line = fp.readline() + if not line: + break + if line[-x:] == target: + line = fp.readline() + m = re.search(r"\|(node\d+\.[a-z]+)\|", line) + about_node = m.group(1) + shutil.copyfile(os.path.join(builddir, about_node), + os.path.join(builddir, "about.html")) + break + if not self.options.numeric: + pwd = os.getcwd() + try: + os.chdir(builddir) + self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT)) + finally: + os.chdir(pwd) + # These files need to be cleaned up here since builddir there + # can be more than one, so we clean each of them. + if self.options.discard_temps: + for fn in ("images.tex", "images.log", "images.aux"): + safe_unlink(os.path.join(builddir, fn)) + + def build_text(self, tempdir=None): + if tempdir is None: + tempdir = self.doc + indexfile = os.path.join(tempdir, "index.html") + self.run("%s -nolist -dump %s >%s.txt" + % (LYNX_BINARY, indexfile, self.doc)) + + def require_temps(self, binary=None): + if not self.latex_runs: + self.build_aux(binary=binary) + + def write_l2h_aux_init_file(self): + options = self.options + fp = open(self.l2h_aux_init_file, "w") + d = string_to_perl(os.path.dirname(L2H_INIT_FILE)) + fp.write("package main;\n" + "push (@INC, '%s');\n" + "$mydir = '%s';\n" + % (d, d)) + fp.write(open(L2H_INIT_FILE).read()) + for filename in options.l2h_init_files: + fp.write("\n# initialization code incorporated from:\n# ") + fp.write(filename) + fp.write("\n") + fp.write(open(filename).read()) + fp.write("\n" + "# auxillary init file for latex2html\n" + "# generated by mkhowto\n" + "$NO_AUTO_LINK = 1;\n" + ) + l2hoption(fp, "ABOUT_FILE", options.about_file) + l2hoption(fp, "ICONSERVER", options.icon_server) + l2hoption(fp, "IMAGE_TYPE", options.image_type) + l2hoption(fp, "ADDRESS", options.address) + l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth) + l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth) + l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link) + l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title) + l2hoption(fp, "FAVORITES_ICON", options.favicon) + l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index) + l2hoption(fp, "DVIPS_SAFE", options.dvips_safe) + fp.write("1;\n") + fp.close() + + def cleanup(self): + self.__have_temps = 0 + for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm", + "%s.idx", "%s.ilg", "%s.ind", "%s.pla", + "%s.bbl", "%s.blg", + "mod%s.idx", "mod%s.ind", "mod%s.ilg", + ): + safe_unlink(pattern % self.doc) + map(safe_unlink, glob.glob(self.doc + "*.syn")) + for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"): + pattern = os.path.join(self.doc, spec) + map(safe_unlink, glob.glob(pattern)) + if "dvi" not in self.options.formats: + safe_unlink(self.doc + ".dvi") + if os.path.isdir(self.doc + "-temp-html"): + shutil.rmtree(self.doc + "-temp-html", ignore_errors=1) + if not self.options.logging: + os.unlink(self.log_filename) + if not self.options.debugging: + os.unlink(self.l2h_aux_init_file) + + def run(self, command): + self.message(command) + if sys.platform.startswith("win"): + rc = os.system(command) + else: + rc = os.system("(%s) </dev/null >>%s 2>&1" + % (command, self.log_filename)) + if rc: + self.warning( + "Session transcript and error messages are in %s." + % self.log_filename) + result = 1 + if hasattr(os, "WIFEXITED"): + if os.WIFEXITED(rc): + result = os.WEXITSTATUS(rc) + self.warning("Exited with status %s." % result) + else: + self.warning("Killed by signal %s." % os.WSTOPSIG(rc)) + else: + self.warning("Return code: %s" % rc) + sys.stderr.write("The relevant lines from the transcript are:\n") + sys.stderr.write("-" * 72 + "\n") + sys.stderr.writelines(get_run_transcript(self.log_filename)) + sys.exit(result) + + def message(self, msg): + msg = "+++ " + msg + if not self.options.quiet: + print msg + self.log(msg + "\n") + + def warning(self, msg): + msg = "*** %s\n" % msg + sys.stderr.write(msg) + self.log(msg) + + def log(self, msg): + fp = open(self.log_filename, "a") + fp.write(msg) + fp.close() + + +def get_run_transcript(filename): + """Return lines from the transcript file for the most recent run() call.""" + fp = open(filename) + lines = fp.readlines() + fp.close() + lines.reverse() + L = [] + for line in lines: + L.append(line) + if line[:4] == "+++ ": + break + L.reverse() + return L + + +def safe_unlink(path): + """Unlink a file without raising an error if it doesn't exist.""" + try: + os.unlink(path) + except os.error: + pass + + +def split_pathname(path): + path = os.path.abspath(path) + dirname, basename = os.path.split(path) + if basename[-4:] == ".tex": + basename = basename[:-4] + return dirname, basename + + +_doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}") +def get_doctype(path): + fp = open(path) + doctype = None + while 1: + line = fp.readline() + if not line: + break + m = _doctype_rx.match(line) + if m: + doctype = m.group(1) + break + fp.close() + return doctype + + +def main(): + options = Options() + try: + args = options.parse(sys.argv[1:]) + except getopt.error, msg: + error(options, msg) + if not args: + # attempt to locate single .tex file in current directory: + args = glob.glob("*.tex") + if not args: + error(options, "No file to process.") + if len(args) > 1: + error(options, "Could not deduce which files should be processed.") + # + # parameters are processed, let's go! + # + for path in args: + Job(options, path).build() + + +def l2hoption(fp, option, value): + if value: + fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value)))) + + +_to_perl = {} +for c in map(chr, range(1, 256)): + _to_perl[c] = c +_to_perl["@"] = "\\@" +_to_perl["$"] = "\\$" +_to_perl['"'] = '\\"' + +def string_to_perl(s): + return ''.join(map(_to_perl.get, s)) + + +def check_for_bibtex(filename): + fp = open(filename) + pos = fp.read().find(r"\bibdata{") + fp.close() + return pos >= 0 + +def uniqify_module_table(filename): + lines = open(filename).readlines() + if len(lines) > 1: + if lines[-1] == lines[-2]: + del lines[-1] + open(filename, "w").writelines(lines) + + +def new_index(filename, label="genindex"): + fp = open(filename, "w") + fp.write(r"""\ +\begin{theindex} +\label{%s} +\end{theindex} +""" % label) + fp.close() + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/mkinfo b/sys/src/cmd/python/Doc/tools/mkinfo new file mode 100755 index 000000000..be75168fd --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/mkinfo @@ -0,0 +1,65 @@ +#! /bin/sh +# -*- Ksh -*- + +# Script to drive the HTML-info conversion process. +# Pass in upto three parameters: +# - the name of the main tex file +# - the name of the output file in texi format (optional) +# - the name of the output file in info format (optional) +# +# Written by Fred L. Drake, Jr. <fdrake@acm.org> + +EMACS=${EMACS:-emacs} +MAKEINFO=${MAKEINFO:-makeinfo} + + +# Normalize file name since something called by html2texi.pl seems to +# screw up with relative path names. +FILENAME="$1" +DOCDIR=`dirname "$FILENAME"` +DOCFILE=`basename "$FILENAME"` +DOCNAME=`basename "$FILENAME" .tex` +if [ $# -gt 1 ]; then + TEXINAME="$2" +else + TEXINAME="python-$DOCNAME.texi" +fi +if [ $# -gt 2 ]; then + INFONAME="$3" +else + INFONAME="python-$DOCNAME.info" +fi + +# Now build the real directory names, and locate our support stuff: +WORKDIR=`pwd` +cd `dirname $0` +TOOLSDIR=`pwd` +cd $DOCDIR +DOCDIR=`pwd` +cd $WORKDIR + +COMMONDIR="`dirname $DOCDIR`/commontex" + + +run() { + # show what we're doing, like make does: + echo "$*" + "$@" || exit $? +} + + +# generate the Texinfo file: + +run $EMACS -batch -q --no-site-file -l $TOOLSDIR/py2texi.el \ + --eval "(setq py2texi-dirs '(\"$DOCDIR\" \"$COMMONDIR\" \"../texinputs\"))" \ + --eval "(setq py2texi-texi-file-name \"$TEXINAME\")" \ + --eval "(setq py2texi-info-file-name \"$INFONAME\")" \ + --eval "(py2texi \"$DOCDIR/$DOCFILE\")" \ + -f kill-emacs +echo Done + + +# generate the .info files: + +run $MAKEINFO --footnote-style end --fill-column 72 \ + --paragraph-indent 0 --output=$INFONAME $TEXINAME diff --git a/sys/src/cmd/python/Doc/tools/mkmodindex b/sys/src/cmd/python/Doc/tools/mkmodindex new file mode 100755 index 000000000..8e869f926 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/mkmodindex @@ -0,0 +1,158 @@ +#! /usr/bin/env python +# -*- Python -*- + +"""usage: %(program)s [options] file... + +Supported options: + + --address addr + -a addr Set the address text to include at the end of the generated + HTML; this should be used for contact information. + --columns cols + -c cols Set the number of columns each index section should be + displayed in. The default is 1. + --help + -h Display this help message. + --letters + -l Split the output into sections by letter. + --output file + -o file Write output to 'file' instead of standard out. + --iconserver is Use 'is' as the directory containing icons for the + navigation bar. The default is 'icons'. + --title str Set the page title to 'str'. The default is 'Global + Module Index'. + --uplink url Set the upward link URL. The default is './'. + --uptitle str Set the upward link title. The default is 'Python + Documentation Index'. +""" +import os +import re +import sys + +from xml.sax.saxutils import quoteattr + +import buildindex +import support + + +class IndexOptions(support.Options): + aesop_type = "links" + + def __init__(self): + support.Options.__init__(self) + self.add_args("l", ["letters"]) + self.letters = 0 + + def handle_option(self, opt, val): + if opt in ("-l", "--letters"): + self.letters = 1 + + def usage(self): + program = os.path.basename(sys.argv[0]) + print __doc__ % {"program": program} + + links = [ + ('author', 'acks.html', 'Acknowledgements'), + ('help', 'about.html', 'About the Python Documentation'), + ] + + def get_header(self): + header = support.Options.get_header(self) + s = '' + for rel, href, title in self.links: + s += '<link rel="%s" href="%s"' % (rel, href) + if title: + s += ' title=' + quoteattr(title) + s += '>\n ' + return header.replace("<link ", s + "<link ", 1) + + +class Node(buildindex.Node): + def __init__(self, link, str, seqno, platinfo): + self.annotation = platinfo or None + if str[0][-5:] == "</tt>": + str = str[:-5] + self.modname = str + buildindex.Node.__init__(self, link, self.modname, seqno) + if platinfo: + s = '<tt class="module">%s</tt> %s' \ + % (self.modname, self.annotation) + else: + s = '<tt class="module">%s</tt>' % str + self.text = [s] + + def __str__(self): + if self.annotation: + return '<tt class="module">%s</tt> %s' \ + % (self.modname, self.annotation) + else: + return '<tt class="module">%s</tt>' % self.modname + +_rx = re.compile( + "<dt><a href=['\"](module-.*\.html)(?:#l2h-\d+)?['\"]>" + "<tt class=['\"]module['\"]>([a-zA-Z_][a-zA-Z0-9_.]*)</tt>\s*(<em>" + "\(<span class=['\"]platform['\"]>.*</span>\)</em>)?</a>") + +def main(): + options = IndexOptions() + options.variables["title"] = "Global Module Index" + options.parse(sys.argv[1:]) + args = options.args + if not args: + args = ["-"] + # + # Collect the input data: + # + nodes = [] + has_plat_flag = 0 + for ifn in args: + if ifn == "-": + ifp = sys.stdin + dirname = '' + else: + ifp = open(ifn) + dirname = os.path.dirname(ifn) + while 1: + line = ifp.readline() + if not line: + break + m = _rx.match(line) + if m: + # This line specifies a module! + basename, modname, platinfo = m.group(1, 2, 3) + has_plat_flag = has_plat_flag or platinfo + linkfile = os.path.join(dirname, basename) + nodes.append(Node('<a href="%s">' % linkfile, modname, + len(nodes), platinfo)) + ifp.close() + # + # Generate all output: + # + num_nodes = len(nodes) + # Here's the HTML generation: + parts = [options.get_header(), + buildindex.process_nodes(nodes, options.columns, options.letters), + options.get_footer(), + ] + if has_plat_flag: + parts.insert(1, PLAT_DISCUSS) + html = ''.join(parts) + program = os.path.basename(sys.argv[0]) + fp = options.get_output_file() + fp.write(html.rstrip() + "\n") + if options.outputfile == "-": + sys.stderr.write("%s: %d index nodes\n" % (program, num_nodes)) + else: + print + print "%s: %d index nodes" % (program, num_nodes) + + +PLAT_DISCUSS = """ +<p> Some module names are followed by an annotation indicating what +platform they are available on.</p> + +""" + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/mkpkglist b/sys/src/cmd/python/Doc/tools/mkpkglist new file mode 100755 index 000000000..1a1fd78f2 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/mkpkglist @@ -0,0 +1,85 @@ +#! /usr/bin/env python +# +# Simple script to create the table that lists the packages available +# for download. This expects the downloadable files and the Makefile +# to be in the current directory. +# +# The output of this script can be pasted directly into the download +# page for the documentation. + +import os +import sys + +from os.path import isfile + + +PKG_TYPES = [ + # human name, filename prefix + ("HTML", "html"), + ("PDF (US-Letter)", "pdf-letter"), + ("PDF (A4)", "pdf-a4"), + ("PostScript (US-Letter)", "postscript-letter"), + ("PostScript (A4)", "postscript-a4"), + ("GNU info", "info"), + ("iSilo", "isilo"), + ("LaTeX", "latex"), + ] + +getversioninfo = os.path.join(os.path.dirname(__file__), "getversioninfo") +fp = os.popen('"%s" "%s"' % (sys.executable, getversioninfo), "r") +release = fp.readline().strip() +fp.close() + +print '''\ +<table border="1" cellpadding="3" align="center"> + <thead> + <tr bgcolor="#99ccff"><th rowspan="2">Content</th> + <th colspan="3">Format</th></tr> + <tr bgcolor="#99ccff"><th>ZIP</th><th>GZip</th><th>BZip2</th></tr> + </thead> + <tbody>''' + +# formatted using FILE_TEMPLATE % (release, prefix, release, extension) +FILE_TEMPLATE = '''\ + <td><a href="../../ftp/python/doc/%s/%s-%s%s" + >%dK</a></td>''' + +NO_FILE_TEMPLATE = '''\ + <td> </td>''' + +def get_size(prefix, ext): + fn = "%s-%s%s" % (prefix, release, ext) + return int(round(os.path.getsize(fn) / 1024.0)) + +def get_file_cell(prefix, ext, have): + if have: + kb = get_size(prefix, ext) + return FILE_TEMPLATE % (release, prefix, release, ext, kb) + else: + return NO_FILE_TEMPLATE + +for name, prefix in PKG_TYPES: + zip_fn = "%s-%s.zip" % (prefix, release) + tgz_fn = "%s-%s.tgz" % (prefix, release) + bz2_fn = "%s-%s.tar.bz2" % (prefix, release) + + have_zip = isfile(zip_fn) + have_tgz = isfile(tgz_fn) + have_bz2 = isfile(bz2_fn) + + have_some = have_zip or have_tgz or have_bz2 + + if not have_some: + print " <!--" + print " <tr><td>%s</td>" % name + print get_file_cell(prefix, ".zip", have_zip) + print get_file_cell(prefix, ".tgz", have_tgz) + print get_file_cell(prefix, ".tar.bz2", have_bz2) + print " </tr>" + if not have_some: + print " -->" + +print '''\ + </tbody> +</table> +''' diff --git a/sys/src/cmd/python/Doc/tools/mksourcepkg b/sys/src/cmd/python/Doc/tools/mksourcepkg new file mode 100755 index 000000000..4b21f7747 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/mksourcepkg @@ -0,0 +1,164 @@ +#! /usr/bin/env python +# -*- Python -*- + +"""%(program)s - script to create the latex source distribution + +usage: + %(program)s [-t|--tools] release [tag] + +with -t|--tools: doesn't include the documents, only the framework + +without [tag]: generate from the current version that's checked in + (*NOT* what's in the current directory!) + +with [tag]: generate from the named tag +""" +#* should be modified to get the Python version number automatically +# from the Makefile or someplace. + +import getopt +import glob +import os +import re +import shutil +import sys +import tempfile + +try: + __file__ +except NameError: + __file__ = sys.argv[0] + +tools = os.path.dirname(os.path.abspath(__file__)) +Doc = os.path.dirname(tools) +patchlevel_tex = os.path.join(Doc, "commontex", "patchlevel.tex") + +quiet = 0 +rx = re.compile(r":ext:(?:[a-zA-Z0-9]+@)?cvs\.([a-zA-Z0-9]+).sourceforge.net:" + r"/cvsroot/\1") + + +def main(): + global quiet + anonymous = False + try: + opts, args = getopt.getopt(sys.argv[1:], "Aabgtzq", + ["all", "bzip2", "gzip", "tools", "zip", + "quiet", "anonymous"]) + except getopt.error, e: + usage(warning=str(e)) + sys.exit(2) + if len(args) not in (1, 2): + usage(warning="wrong number of parameters") + sys.exit(2) + tools = 0 + formats = {} + for opt, arg in opts: + if opt in ("-t", "--tools"): + tools = 1 + elif opt in ("-q", "--quiet"): + quiet = quiet + 1 + elif opt in ("-b", "--bzip2"): + formats["bzip2"] = 1 + elif opt in ("-g", "--gzip"): + formats["gzip"] = 1 + elif opt in ("-z", "--zip"): + formats["zip"] = 1 + elif opt in ("-a", "--all"): + formats["bzip2"] = 1 + formats["gzip"] = 1 + formats["zip"] = 1 + elif opt in ("-A", "--anonymous"): + anonymous = True + if formats: + # make order human-predictable + formats = formats.keys() + formats.sort() + else: + formats = ["gzip"] + release = args[0] + svntag = None + if len(args) > 1: + svntag = args[1] + tempdir = tempfile.mktemp() + os.mkdir(tempdir) + pkgdir = os.path.join(tempdir, "Python-Docs-" + release) + pwd = os.getcwd() + mydir = os.path.abspath(os.path.dirname(sys.argv[0])) + os.chdir(tempdir) + if not quiet: + print "--- current directory is:", tempdir + if not svntag: + svntag = "trunk" + svnbase = "http://svn.python.org/projects/python" + run("svn export %s/%s/Doc Python-Docs-%s" + % (svnbase, svntag, release)) + + # Copy in the version informtation, if we're not just going to + # rip it back out: + if not tools: + if not os.path.exists(patchlevel_tex): + run(os.path.join(here, "getversioninfo")) + dest = os.path.join("Python-Docs-" + release, "commontex", + "patchlevel.tex") + shutil.copyfile(patchlevel_tex, dest) + + # Copy in the license file: + LICENSE = os.path.normpath( + os.path.join(mydir, os.pardir, os.pardir, "LICENSE")) + shutil.copyfile(LICENSE, "LICENSE") + if tools: + archive = "doctools-" + release + # we don't want the actual documents in this case: + for d in ("api", "dist", "doc", "ext", "inst", + "lib", "mac", "ref", "tut", "commontex"): + shutil.rmtree(os.path.join(pkgdir, d)) + else: + archive = "latex-" + release + + # XXX should also remove the .cvsignore files at this point + + os.chdir(tempdir) + archive = os.path.join(pwd, archive) + for format in formats: + if format == "bzip2": + run("tar cf - Python-Docs-%s | bzip2 -9 >%s.tar.bz2" + % (release, archive)) + elif format == "gzip": + run("tar cf - Python-Docs-%s | gzip -9 >%s.tgz" + % (release, archive)) + elif format == "zip": + if os.path.exists(archive + ".zip"): + os.unlink(archive + ".zip") + run("zip -q -r9 %s.zip Python-Docs-%s" + % (archive, release)) + + # clean up the work area: + os.chdir(pwd) + shutil.rmtree(tempdir) + + +def run(cmd): + if quiet < 2: + print "+++", cmd + if quiet: + cmd = "%s >/dev/null" % cmd + rc = os.system(cmd) + if rc: + sys.exit(rc) + + +def usage(warning=None): + stdout = sys.stdout + sys.stdout = sys.stderr + program = os.path.basename(sys.argv[0]) + try: + if warning: + print "%s: %s\n" % (program, warning) + print __doc__ % {"program": program} + finally: + sys.stdout = stdout + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/node2label.pl b/sys/src/cmd/python/Doc/tools/node2label.pl new file mode 100755 index 000000000..6491b2048 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/node2label.pl @@ -0,0 +1,71 @@ +#! /usr/bin/env perl + +# On Cygwin, we actually have to generate a temporary file when doing +# the inplace edit, or we'll get permission errors. Not sure who's +# bug this is, except that it isn't ours. To deal with this, we +# generate backups during the edit phase and remove them at the end. +# +use English; +$INPLACE_EDIT = '.bak'; + +# read the labels, then reverse the mappings +require "labels.pl"; + +%nodes = (); +my $key; +# sort so that we get a consistent assignment for nodes with multiple labels +foreach $label (sort keys %external_labels) { + # + # If the label can't be used as a filename on non-Unix platforms, + # skip it. Such labels may be used internally within the documentation, + # but will never be used for filename generation. + # + if ($label =~ /^([-.a-zA-Z0-9]+)$/) { + $key = $external_labels{$label}; + $key =~ s|^/||; + $nodes{$key} = $label; + } +} + +# This adds the "internal" labels added for indexing. These labels will not +# be used for file names. +require "intlabels.pl"; +foreach $label (keys %internal_labels) { + $key = $internal_labels{$label}; + $key =~ s|^/||; + if (defined($nodes{$key})) { + $nodes{$label} = $nodes{$key}; + } +} + +# collect labels that have been used +%newnames = (); + +while (<>) { + # don't want to do one s/// per line per node + # so look for lines with hrefs, then do s/// on nodes present + if (/(HREF|href)=[\"\']node\d+\.html[\#\"\']/) { + @parts = split(/(HREF|href)\=[\"\']/); + shift @parts; + for $node (@parts) { + $node =~ s/[\#\"\'].*$//g; + chomp($node); + if (defined($nodes{$node})) { + $label = $nodes{$node}; + if (s/(HREF|href)=([\"\'])$node([\#\"\'])/href=$2$label.html$3/g) { + s/(HREF|href)=([\"\'])$label.html/href=$2$label.html/g; + $newnames{$node} = "$label.html"; + } + } + } + } + print; +} + +foreach $oldname (keys %newnames) { + rename($oldname, $newnames{$oldname}); +} + +foreach $filename (glob('*.bak')) { + unlink($filename); +} diff --git a/sys/src/cmd/python/Doc/tools/prechm.py b/sys/src/cmd/python/Doc/tools/prechm.py new file mode 100644 index 000000000..57a43fd6f --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/prechm.py @@ -0,0 +1,519 @@ +""" + Makes the necesary files to convert from plain html of + Python 1.5 and 1.5.x Documentation to + Microsoft HTML Help format version 1.1 + Doesn't change the html's docs. + + by hernan.foffani@iname.com + no copyright and no responsabilities. + + modified by Dale Nagata for Python 1.5.2 + + Renamed from make_chm.py to prechm.py, and checked into the Python + project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim + and Fred Drake. Obtained from Robin Dunn's .chm packaging of the + Python 2.2 docs, at <http://alldunn.com/python/>. +""" + +import sys +import os +from formatter import NullWriter, AbstractFormatter +from htmllib import HTMLParser +import getopt +import cgi + +usage_mode = ''' +Usage: prechm.py [-c] [-k] [-p] [-v 1.5[.x]] filename + -c: does not build filename.hhc (Table of Contents) + -k: does not build filename.hhk (Index) + -p: does not build filename.hhp (Project File) + -v 1.5[.x]: makes help for the python 1.5[.x] docs + (default is python 1.5.2 docs) +''' + +# Project file (*.hhp) template. 'arch' is the file basename (like +# the pythlp in pythlp.hhp); 'version' is the doc version number (like +# the 2.2 in Python 2.2). +# The magical numbers in the long line under [WINDOWS] set most of the +# user-visible features (visible buttons, tabs, etc). +# About 0x10384e: This defines the buttons in the help viewer. The +# following defns are taken from htmlhelp.h. Not all possibilities +# actually work, and not all those that work are available from the Help +# Workshop GUI. In particular, the Zoom/Font button works and is not +# available from the GUI. The ones we're using are marked with 'x': +# +# 0x000002 Hide/Show x +# 0x000004 Back x +# 0x000008 Forward x +# 0x000010 Stop +# 0x000020 Refresh +# 0x000040 Home x +# 0x000080 Forward +# 0x000100 Back +# 0x000200 Notes +# 0x000400 Contents +# 0x000800 Locate x +# 0x001000 Options x +# 0x002000 Print x +# 0x004000 Index +# 0x008000 Search +# 0x010000 History +# 0x020000 Favorites +# 0x040000 Jump 1 +# 0x080000 Jump 2 +# 0x100000 Zoom/Font x +# 0x200000 TOC Next +# 0x400000 TOC Prev + +project_template = ''' +[OPTIONS] +Compiled file=%(arch)s.chm +Contents file=%(arch)s.hhc +Default Window=%(arch)s +Default topic=index.html +Display compile progress=No +Full text search stop list file=%(arch)s.stp +Full-text search=Yes +Index file=%(arch)s.hhk +Language=0x409 +Title=Python %(version)s Documentation + +[WINDOWS] +%(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\ +"index.html","index.html",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0 + +[FILES] +''' + +contents_header = '''\ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> +<HTML> +<HEAD> +<meta name="GENERATOR" content="Microsoft® HTML Help Workshop 4.1"> +<!-- Sitemap 1.0 --> +</HEAD><BODY> +<OBJECT type="text/site properties"> + <param name="Window Styles" value="0x801227"> + <param name="ImageType" value="Folder"> +</OBJECT> +<UL> +''' + +contents_footer = '''\ +</UL></BODY></HTML> +''' + +object_sitemap = '''\ +<OBJECT type="text/sitemap"> + <param name="Name" value="%s"> + <param name="Local" value="%s"> +</OBJECT> +''' + +# List of words the full text search facility shouldn't index. This +# becomes file ARCH.stp. Note that this list must be pretty small! +# Different versions of the MS docs claim the file has a maximum size of +# 256 or 512 bytes (including \r\n at the end of each line). +# Note that "and", "or", "not" and "near" are operators in the search +# language, so no point indexing them even if we wanted to. +stop_list = ''' +a and are as at +be but by +for +if in into is it +near no not +of on or +such +that the their then there these they this to +was will with +''' + +# s is a string or None. If None or empty, return None. Else tack '.html' +# on to the end, unless it's already there. +def addhtml(s): + if s: + if not s.endswith('.html'): + s += '.html' + return s + +# Convenience class to hold info about "a book" in HTMLHelp terms == a doc +# directory in Python terms. +class Book: + def __init__(self, directory, title, firstpage, + contentpage=None, indexpage=None): + self.directory = directory + self.title = title + self.firstpage = addhtml(firstpage) + self.contentpage = addhtml(contentpage) + self.indexpage = addhtml(indexpage) + +# Library Doc list of books: +# each 'book' : (Dir, Title, First page, Content page, Index page) +supported_libraries = { + '2.5': + [ + Book('.', 'Main page', 'index'), + Book('.', 'Global Module Index', 'modindex'), + Book('whatsnew', "What's New", 'index', 'contents'), + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents'), + Book('inst','Installing Python Modules', 'inst', 'index'), + Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'), + ], + + '2.4': + [ + Book('.', 'Main page', 'index'), + Book('.', 'Global Module Index', 'modindex'), + Book('whatsnew', "What's New", 'index', 'contents'), + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents'), + Book('inst','Installing Python Modules', 'inst', 'index'), + Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'), + ], + + '2.3': + [ + Book('.', 'Main page', 'index'), + Book('.', 'Global Module Index', 'modindex'), + Book('whatsnew', "What's New", 'index', 'contents'), + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents'), + Book('inst','Installing Python Modules', 'inst', 'index'), + Book('dist','Distributing Python Modules', 'dist', 'index'), + ], + + '2.2': + [ + Book('.', 'Main page', 'index'), + Book('.', 'Global Module Index', 'modindex'), + Book('whatsnew', "What's New", 'index', 'contents'), + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents'), + Book('inst','Installing Python Modules', 'inst', 'index'), + Book('dist','Distributing Python Modules', 'dist', 'index'), + ], + + '2.1.1': + [ + Book('.', 'Main page', 'index'), + Book('.', 'Global Module Index', 'modindex'), + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents'), + Book('inst','Installing Python Modules', 'inst', 'index'), + Book('dist','Distributing Python Modules', 'dist', 'index'), + ], + + '2.0.0': + [ + Book('.', 'Global Module Index', 'modindex'), + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents'), + Book('inst','Installing Python Modules', 'inst', 'contents'), + Book('dist','Distributing Python Modules', 'dist', 'contents'), + ], + + # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version: + # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99): + '1.5.2': + [ + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents') + ], + + # library for 1.5.1 version: + '1.5.1': + [ + Book('tut','Tutorial','tut','contents'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref-1','ref-2','ref-11'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex') + ], + + # library for 1.5 version: + '1.5': + [ + Book('tut','Tutorial','tut','node1'), + Book('lib','Library Reference','lib','node1','node268'), + Book('ref','Language Reference','ref-1','ref-2','ref-11'), + Book('ext','Extending and Embedding','ext','node1'), + Book('api','Python/C API','api','node1','node48') + ] +} + +# AlmostNullWriter doesn't print anything; it just arranges to save the +# text sent to send_flowing_data(). This is used to capture the text +# between an anchor begin/end pair, e.g. for TOC entries. + +class AlmostNullWriter(NullWriter): + + def __init__(self): + NullWriter.__init__(self) + self.saved_clear() + + def send_flowing_data(self, data): + stripped = data.strip() + if stripped: # don't bother to save runs of whitespace + self.saved.append(stripped) + + # Forget all saved text. + def saved_clear(self): + self.saved = [] + + # Return all saved text as a string. + def saved_get(self): + return ' '.join(self.saved) + +class HelpHtmlParser(HTMLParser): + + def __init__(self, formatter, path, output): + HTMLParser.__init__(self, formatter) + self.path = path # relative path + self.ft = output # output file + self.indent = 0 # number of tabs for pretty printing of files + self.proc = False # True when actively processing, else False + # (headers, footers, etc) + # XXX This shouldn't need to be a stack -- anchors shouldn't nest. + # XXX See SF bug <http://www.python.org/sf/546579>. + self.hrefstack = [] # stack of hrefs from anchor begins + + def begin_group(self): + self.indent += 1 + self.proc = True + + def finish_group(self): + self.indent -= 1 + # stop processing when back to top level + self.proc = self.indent > 0 + + def anchor_bgn(self, href, name, type): + if self.proc: + # XXX See SF bug <http://www.python.org/sf/546579>. + # XXX index.html for the 2.2.1 language reference manual contains + # XXX nested <a></a> tags in the entry for the section on blank + # XXX lines. We want to ignore the nested part completely. + if len(self.hrefstack) == 0: + self.saved_clear() + self.hrefstack.append(href) + + def anchor_end(self): + if self.proc: + # XXX See XXX above. + if self.hrefstack: + title = cgi.escape(self.saved_get(), True) + path = self.path + '/' + self.hrefstack.pop() + self.tab(object_sitemap % (title, path)) + + def start_dl(self, atr_val): + self.begin_group() + + def end_dl(self): + self.finish_group() + + def do_dt(self, atr_val): + # no trailing newline on purpose! + self.tab("<LI>") + + # Write text to output file. + def write(self, text): + self.ft.write(text) + + # Write text to output file after indenting by self.indent tabs. + def tab(self, text=''): + self.write('\t' * self.indent) + if text: + self.write(text) + + # Forget all saved text. + def saved_clear(self): + self.formatter.writer.saved_clear() + + # Return all saved text as a string. + def saved_get(self): + return self.formatter.writer.saved_get() + +class IdxHlpHtmlParser(HelpHtmlParser): + # nothing special here, seems enough with parent class + pass + +class TocHlpHtmlParser(HelpHtmlParser): + + def start_dl(self, atr_val): + self.begin_group() + self.tab('<UL>\n') + + def end_dl(self): + self.finish_group() + self.tab('</UL>\n') + + def start_ul(self, atr_val): + self.begin_group() + self.tab('<UL>\n') + + def end_ul(self): + self.finish_group() + self.tab('</UL>\n') + + def do_li(self, atr_val): + # no trailing newline on purpose! + self.tab("<LI>") + +def index(path, indexpage, output): + parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()), + path, output) + f = open(path + '/' + indexpage) + parser.feed(f.read()) + parser.close() + f.close() + +def content(path, contentpage, output): + parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()), + path, output) + f = open(path + '/' + contentpage) + parser.feed(f.read()) + parser.close() + f.close() + +def do_index(library, output): + output.write('<UL>\n') + for book in library: + print '\t', book.title, '-', book.indexpage + if book.indexpage: + index(book.directory, book.indexpage, output) + output.write('</UL>\n') + +def do_content(library, version, output): + output.write(contents_header) + for book in library: + print '\t', book.title, '-', book.firstpage + path = book.directory + "/" + book.firstpage + output.write('<LI>') + output.write(object_sitemap % (book.title, path)) + if book.contentpage: + content(book.directory, book.contentpage, output) + output.write(contents_footer) + +# Fill in the [FILES] section of the project (.hhp) file. +# 'library' is the list of directory description tuples from +# supported_libraries for the version of the docs getting generated. +def do_project(library, output, arch, version): + output.write(project_template % locals()) + pathseen = {} + for book in library: + directory = book.directory + path = directory + '\\%s\n' + for page in os.listdir(directory): + if page.endswith('.html') or page.endswith('.css'): + fullpath = path % page + if fullpath not in pathseen: + output.write(fullpath) + pathseen[fullpath] = True + +def openfile(file): + try: + p = open(file, "w") + except IOError, msg: + print file, ":", msg + sys.exit(1) + return p + +def usage(): + print usage_mode + sys.exit(0) + +def do_it(args = None): + if not args: + args = sys.argv[1:] + + if not args: + usage() + + try: + optlist, args = getopt.getopt(args, 'ckpv:') + except getopt.error, msg: + print msg + usage() + + if not args or len(args) > 1: + usage() + arch = args[0] + + version = None + for opt in optlist: + if opt[0] == '-v': + version = opt[1] + break + if not version: + usage() + + library = supported_libraries[version] + + if not (('-p','') in optlist): + fname = arch + '.stp' + f = openfile(fname) + print "Building stoplist", fname, "..." + words = stop_list.split() + words.sort() + for word in words: + print >> f, word + f.close() + + f = openfile(arch + '.hhp') + print "Building Project..." + do_project(library, f, arch, version) + if version == '2.0.0': + for image in os.listdir('icons'): + f.write('icons'+ '\\' + image + '\n') + + f.close() + + if not (('-c','') in optlist): + f = openfile(arch + '.hhc') + print "Building Table of Content..." + do_content(library, version, f) + f.close() + + if not (('-k','') in optlist): + f = openfile(arch + '.hhk') + print "Building Index..." + do_index(library, f) + f.close() + +if __name__ == '__main__': + do_it() diff --git a/sys/src/cmd/python/Doc/tools/push-docs.sh b/sys/src/cmd/python/Doc/tools/push-docs.sh new file mode 100755 index 000000000..28a4b3158 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/push-docs.sh @@ -0,0 +1,138 @@ +#! /bin/sh + +# Script to push docs from my development area to SourceForge, where the +# update-docs.sh script unpacks them into their final destination. + +TARGETHOST=www.python.org +TARGETDIR=/usr/home/fdrake/tmp + +PKGTYPE="bzip" # must be one of: bzip, tar, zip ("tar" implies gzip) + +TARGET="$TARGETHOST:$TARGETDIR" + +ADDRESSES='python-dev@python.org doc-sig@python.org python-list@python.org' + +TOOLDIR="`dirname $0`" +VERSION=`$TOOLDIR/getversioninfo` + +# Set $EXTRA to something non-empty if this is a non-trunk version: +EXTRA=`echo "$VERSION" | sed 's/^[0-9][0-9]*\.[0-9][0-9]*//'` + +if echo "$EXTRA" | grep -q '[.]' ; then + DOCLABEL="maintenance" + DOCTYPE="maint" +else + DOCLABEL="development" + DOCTYPE="devel" +fi + +DOCTYPE_SPECIFIED=false +EXPLANATION='' +ANNOUNCE=true + +getopt -T >/dev/null +if [ $? -eq 4 ] ; then + # We have a sufficiently useful getopt(1) implementation. + eval "set -- `getopt -ssh m:p:qt:F: \"$@\"`" +else + # This version of getopt doesn't support quoting of long options + # with spaces, so let's not rely on it at all. + : +fi + +while [ "$#" -gt 0 ] ; do + case "$1" in + -m) + EXPLANATION="$2" + shift 2 + ;; + -p) + PKGTYPE="$2" + shift 1 + ;; + -q) + ANNOUNCE=false + shift 1 + ;; + -t) + DOCTYPE="$2" + DOCTYPE_SPECIFIED=true + shift 2 + ;; + -F) + EXPLANATION="`cat $2`" + shift 2 + ;; + --) + shift 1 + break + ;; + -*) + echo "Unknown option: $1" >&2 + exit 2 + ;; + *) + break + ;; + esac +done +if [ "$1" ] ; then + if [ "$EXPLANATION" ] ; then + echo "Explanation may only be given once!" >&2 + exit 2 + fi + EXPLANATION="$1" + shift +fi + +START="`pwd`" +MYDIR="`dirname $0`" +cd "$MYDIR" +MYDIR="`pwd`" + +if [ "$PKGTYPE" = bzip ] ; then + PKGEXT=tar.bz2 +elif [ "$PKGTYPE" = tar ] ; then + PKGEXT=tgz +elif [ "$PKGTYPE" = zip ] ; then + PKGEXT=zip +else + echo 1>&2 "unsupported package type: $PKGTYPE" + exit 2 +fi + +# switch to .../Doc/ +cd .. + +# If $DOCTYPE was not specified explicitly, look for .doctype in +# .../Doc/ and use the content of that file if present. +if $DOCTYPE_SPECIFIED ; then + : +elif [ -f .doctype ] ; then + DOCTYPE="`cat .doctype`" +fi + +make --no-print-directory ${PKGTYPE}html || exit $? +PACKAGE="html-$VERSION.$PKGEXT" +scp "$PACKAGE" tools/update-docs.sh $TARGET/ || exit $? +ssh "$TARGETHOST" tmp/update-docs.sh $DOCTYPE $PACKAGE '&&' rm tmp/update-docs.sh || exit $? + +if $ANNOUNCE ; then + sendmail $ADDRESSES <<EOF +To: $ADDRESSES +From: "Fred L. Drake" <fdrake@acm.org> +Subject: [$DOCLABEL doc updates] +X-No-Archive: yes + +The $DOCLABEL version of the documentation has been updated: + + http://$TARGETHOST/dev/doc/$DOCTYPE/ + +$EXPLANATION + +A downloadable package containing the HTML is also available: + + http://$TARGETHOST/dev/doc/python-docs-$DOCTYPE.$PKGEXT +EOF + exit $? +fi diff --git a/sys/src/cmd/python/Doc/tools/py2texi.el b/sys/src/cmd/python/Doc/tools/py2texi.el new file mode 100644 index 000000000..450c30c13 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/py2texi.el @@ -0,0 +1,970 @@ +;;; py2texi.el -- Conversion of Python LaTeX documentation to Texinfo + +;; Copyright (C) 2006 Jeroen Dekkers <jeroen@dekkers.cx> +;; Copyright (C) 1998, 1999, 2001, 2002 Milan Zamazal + +;; Author: Milan Zamazal <pdm@zamazal.org> +;; Version: $Id: py2texi.el 52974 2006-12-09 12:13:02Z matthias.klose $ +;; Keywords: python + +;; COPYRIGHT NOTICE +;; +;; This program is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 2, or (at your option) any later +;; version. +;; +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You can find the GNU General Public License at +;; http://www.gnu.org/copyleft/gpl.html +;; or you can write to the Free Software Foundation, Inc., 59 Temple Place, +;; Suite 330, Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This is a Q&D hack for conversion of Python manuals to on-line help format. +;; I desperately needed usable online documenta for Python, so I wrote this. +;; The result code is ugly and need not contain complete information from +;; Python manuals. I apologize for my ignorance, especially ignorance to +;; python.sty. Improvements of this convertor are welcomed. + +;; How to use it: +;; Load this file and apply `M-x py2texi'. You will be asked for name of a +;; file to be converted. + +;; Where to find it: +;; New versions of this code might be found at +;; http://www.zamazal.org/software/python/py2texi/ . + +;;; Code: + + +(require 'texinfo) +(eval-when-compile + (require 'cl)) + + +(defvar py2texi-python-version "2.2" + "What to substitute for the \\version macro.") + +(defvar py2texi-python-short-version + (progn + (string-match "[0-9]+\\.[0-9]+" py2texi-python-version) + (match-string 0 py2texi-python-version)) + "Short version number, usually set by the LaTeX commands.") + +(defvar py2texi-texi-file-name nil + "If non-nil, that string is used as the name of the Texinfo file. +Otherwise a generated Texinfo file name is used.") + +(defvar py2texi-info-file-name nil + "If non-nil, that string is used as the name of the Info file. +Otherwise a generated Info file name is used.") + +(defvar py2texi-stop-on-problems nil + "*If non-nil, stop when you encouter soft problem.") + +(defconst py2texi-environments + '(("abstract" 0 "@quotation" "@end quotation\n") + ("center" 0 "" "") + ("cfuncdesc" 3 + (progn (setq findex t) + "\n@table @code\n@item \\1 \\2(\\3)\n@findex \\2\n") + "@end table\n") + ("cmemberdesc" 3 + "\n@table @code\n@item \\2 \\3\n" + "@end table\n") + ("classdesc" 2 + (progn (setq obindex t) + "\n@table @code\n@item \\1(\\2)\n@obindex \\1\n") + "@end table\n") + ("classdesc*" 1 + (progn (setq obindex t) + "\n@table @code\n@item \\1\n@obindex \\1\n") + "@end table\n") + ("comment" 0 "\n@ignore\n" "\n@end ignore\n") + ("csimplemacrodesc" 1 + (progn (setq cindex t) + "\n@table @code\n@item \\1\n@cindex \\1\n") + "@end table\n") + ("ctypedesc" 1 + (progn (setq cindex t) + "\n@table @code\n@item \\1\n@cindex \\1\n") + "@end table\n") + ("cvardesc" 2 + (progn (setq findex t) + "\n@table @code\n@item \\1 \\2\n@findex \\2\n") + "@end table\n") + ("datadesc" 1 + (progn (setq findex t) + "\n@table @code\n@item \\1\n@findex \\1\n") + "@end table\n") + ("datadescni" 1 "\n@table @code\n@item \\1\n" "@end table\n") + ("definitions" 0 "@table @dfn" "@end table\n") + ("description" 0 "@table @samp" "@end table\n") + ("displaymath" 0 "" "") + ("document" 0 + (concat "@defcodeindex mo\n" + "@defcodeindex ob\n" + "@titlepage\n" + (format "@title " title "\n") + (format "@author " author "\n") + "@page\n" + author-address + "@end titlepage\n" + "@node Top, , , (dir)\n") + (concat "@indices\n" + "@contents\n" + "@bye\n")) + ("enumerate" 0 "@enumerate" "@end enumerate") + ("envdesc" 2 (concat "\n@table @code" + "\n@item @backslash{}begin@{\\1@}\\2") + "@end table\n") + ("excdesc" 1 + (progn (setq obindex t) + "\n@table @code\n@item \\1\n@obindex \\1\n") + "@end table\n") + ("excclassdesc" 2 + (progn (setq obindex t) + "\n@table @code\n@item \\1(\\2)\n@obindex \\1\n") + "@end table\n") + ("flushleft" 0 "" "") + ("fulllineitems" 0 "\n@table @code\n" "@end table\n") + ("funcdesc" 2 + (progn (setq findex t) + "\n@table @code\n@item \\1(\\2)\n@findex \\1\n") + "@end table\n") + ("funcdescni" 2 "\n@table @code\n@item \\1(\\2)\n" "@end table\n") + ("itemize" 0 "@itemize @bullet" "@end itemize\n") + ("list" 2 "\n@table @code\n" "@end table\n") + ("longtableii" 4 (concat "@multitable @columnfractions .5 .5\n" + "@item \\3 @tab \\4\n" + "@item ------- @tab ------ \n") + "@end multitable\n") + ("longtableiii" 5 (concat "@multitable @columnfractions .33 .33 .33\n" + "@item \\3 @tab \\4 @tab \\5\n" + "@item ------- @tab ------ @tab ------\n") + "@end multitable\n") + ("macrodesc" 2 (concat "\n@table @code" + "\n@item \\1@{\\2@}") + "@end table\n") + ("memberdesc" 1 + (progn (setq findex t) + "\n@table @code\n@item \\1\n@findex \\1\n") + "@end table\n") + ("memberdescni" 1 "\n@table @code\n@item \\1\n" "@end table\n") + ("methoddesc" 2 + (progn (setq findex t) + "\n@table @code\n@item \\1(\\2)\n@findex \\1\n") + "@end table\n") + ("methoddescni" 2 "\n@table @code\n@item \\1(\\2)\n" "@end table\n") + ("notice" 0 "@emph{Notice:} " "") + ("opcodedesc" 2 + (progn (setq findex t) + "\n@table @code\n@item \\1 \\2\n@findex \\1\n") + "@end table\n") + ("productionlist" 0 "\n@table @code\n" "@end table\n") + ("quotation" 0 "@quotation" "@end quotation") + ("quote" 0 "@quotation" "@end quotation") + ("seealso" 0 "See also:\n@table @emph\n" "@end table\n") + ("seealso*" 0 "@table @emph\n" "@end table\n") + ("sloppypar" 0 "" "") + ("small" 0 "" "") + ("tableii" 4 (concat "@multitable @columnfractions .5 .5\n" + "@item \\3 @tab \\4\n" + "@item ------- @tab ------ \n") + "@end multitable\n") + ("tableiii" 5 (concat "@multitable @columnfractions .33 .33 .33\n" + "@item \\3 @tab \\4 @tab \\5\n" + "@item ------- @tab ------ @tab ------\n") + "@end multitable\n") + ("tableiv" 6 (concat + "@multitable @columnfractions .25 .25 .25 .25\n" + "@item \\3 @tab \\4 @tab \\5 @tab \\6\n" + "@item ------- @tab ------- @tab ------- @tab -------\n") + "@end multitable\n") + ("tablev" 7 (concat + "@multitable @columnfractions .20 .20 .20 .20 .20\n" + "@item \\3 @tab \\4 @tab \\5 @tab \\6 @tab \\7\n" + "@item ------- @tab ------- @tab ------- @tab ------- @tab -------\n") + "@end multitable\n") + ("alltt" 0 "@example" "@end example") + ) + "Associative list defining substitutions for environments. +Each list item is of the form (ENVIRONMENT ARGNUM BEGIN END) where: +- ENVIRONMENT is LaTeX environment name +- ARGNUM is number of (required) macro arguments +- BEGIN is substitution for \begin{ENVIRONMENT} +- END is substitution for \end{ENVIRONMENT} +Both BEGIN and END are evaled. Moreover, you can reference arguments through +\N regular expression notation in strings of BEGIN.") + +(defconst py2texi-commands + '(("AA" 0 "@AA{}") + ("aa" 0 "@aa{}") + ("ABC" 0 "ABC") + ("appendix" 0 (progn (setq appendix t) "")) + ("ASCII" 0 "ASCII") + ("author" 1 (progn (setq author (match-string 1 string)) "")) + ("authoraddress" 1 + (progn (setq author-address (match-string 1 string)) "")) + ("b" 1 "@w{\\1}") + ("backslash" 0 "@backslash{}") + ("bf" 0 "@destroy") + ("bifuncindex" 1 (progn (setq findex t) "@findex{\\1}")) + ("C" 0 "C") + ("c" 0 "@,") + ("catcode" 0 "") + ("cdata" 1 "@code{\\1}") + ("centerline" 1 "@center \\1") + ("cfuncline" 3 "@itemx \\1 \\2(\\3)\n@findex \\2") + ("cfunction" 1 "@code{\\1}") + ("chapter" 1 (format "@node \\1\n@%s \\1\n" + (if appendix "appendix" "chapter"))) + ("chapter*" 1 "@node \\1\n@unnumbered \\1\n") + ("character" 1 "@samp{\\1}") + ("citetitle" 1 "@ref{Top,,,\\1}") + ("class" 1 "@code{\\1}") + ("cmemberline" 3 "@itemx \\2 \\3\n") + ("code" 1 "@code{\\1}") + ("command" 1 "@command{\\1}") + ("constant" 1 "@code{\\1}") + ("copyright" 1 "@copyright{}") + ("Cpp" 0 "C++") + ("csimplemacro" 1 "@code{\\1}") + ("ctype" 1 "@code{\\1}") + ("dataline" 1 (progn (setq findex t) "@item \\1\n@findex \\1\n")) + ("date" 1 "\\1") + ("declaremodule" 2 (progn (setq cindex t) "@label{\\2}@cindex{\\2}")) + ("deprecated" 2 "@emph{This is deprecated in Python \\1. \\2}\n\n") + ("dfn" 1 "@dfn{\\1}") + ("documentclass" 1 py2texi-magic) + ("e" 0 "@backslash{}") + ("else" 0 (concat "@end ifinfo\n@" (setq last-if "iftex"))) + ("env" 1 "@code{\\1}") + ("EOF" 0 "@code{EOF}") + ("email" 1 "@email{\\1}") + ("em" 1 "@emph{\\1}") + ("emph" 1 "@emph{\\1}") + ("envvar" 1 "@env{\\1}") + ("exception" 1 "@code{\\1}") + ("exindex" 1 (progn (setq obindex t) "@obindex{\\1}")) + ("fi" 0 (if (equal last-if "ifx") "" (concat "@end " last-if))) + ("file" 1 "@file{\\1}") + ("filenq" 1 "@file{\\1}") + ("filevar" 1 "@file{@var{\\1}}") + ("footnote" 1 "@footnote{\\1}") + ("frac" 0 "") + ("funcline" 2 (progn (setq findex t) "@item \\1 \\2\n@findex \\1")) + ("funclineni" 2 "@item \\1 \\2") + ("function" 1 "@code{\\1}") + ("grammartoken" 1 "@code{\\1}") + ("guilabel" 1 "@strong{\\1}") + ("hline" 0 "") + ("ifx" 0 (progn (setq last-if "ifx") "")) + ("ifhtml" 0 (concat "@" (setq last-if "ifinfo"))) + ("iftexi" 0 (concat "@" (setq last-if "ifinfo"))) + ("index" 1 (progn (setq cindex t) "@cindex{\\1}")) + ("indexii" 2 (progn (setq cindex t) "@cindex{\\1 \\2}")) + ("indexiii" 3 (progn (setq cindex t) "@cindex{\\1 \\2 \\3}")) + ("indexiv" 3 (progn (setq cindex t) "@cindex{\\1 \\2 \\3 \\4}")) + ("infinity" 0 "@emph{infinity}") + ("it" 0 "@destroy") + ("kbd" 1 "@key{\\1}") + ("keyword" 1 "@code{\\1}") + ("kwindex" 1 (progn (setq cindex t) "@cindex{\\1}")) + ("label" 1 "@label{\\1}") + ("Large" 0 "") + ("LaTeX" 0 "La@TeX{}") + ("large" 0 "") + ("ldots" 0 "@dots{}") + ("leftline" 1 "\\1") + ("leq" 0 "<=") + ("lineii" 2 "@item \\1 @tab \\2") + ("lineiii" 3 "@item \\1 @tab \\2 @tab \\3") + ("lineiv" 4 "@item \\1 @tab \\2 @tab \\3 @tab \\4") + ("linev" 5 "@item \\1 @tab \\2 @tab \\3 @tab \\4 @tab \\5") + ("locallinewidth" 0 "") + ("localmoduletable" 0 "") + ("longprogramopt" 1 "@option{--\\1}") + ("macro" 1 "@code{@backslash{}\\1}") + ("mailheader" 1 "@code{\\1}") + ("makeindex" 0 "") + ("makemodindex" 0 "") + ("maketitle" 0 (concat "@top " title "\n")) + ("makevar" 1 "@code{\\1}") + ("manpage" 2 "@samp{\\1(\\2)}") + ("mbox" 1 "@w{\\1}") + ("member" 1 "@code{\\1}") + ("memberline" 1 "@item \\1\n@findex \\1\n") + ("menuselection" 1 "@samp{\\1}") + ("method" 1 "@code{\\1}") + ("methodline" 2 (progn (setq moindex t) "@item \\1(\\2)\n@moindex \\1\n")) + ("methodlineni" 2 "@item \\1(\\2)\n") + ("mimetype" 1 "@samp{\\1}") + ("module" 1 "@samp{\\1}") + ("moduleauthor" 2 "") + ("modulesynopsis" 1 "\\1") + ("moreargs" 0 "@dots{}") + ("n" 0 "@backslash{}n") + ("newcommand" 2 "") + ("newlength" 1 "") + ("newsgroup" 1 "@samp{\\1}") + ("nodename" 1 + (save-excursion + (save-match-data + (re-search-backward "^@node ")) + (delete-region (point) (save-excursion (end-of-line) (point))) + (insert "@node " (match-string 1 string)) + "")) + ("noindent" 0 "@noindent ") + ("note" 1 "@emph{Note:} \\1") + ("NULL" 0 "@code{NULL}") + ("obindex" 1 (progn (setq obindex t) "@obindex{\\1}")) + ("opindex" 1 (progn (setq cindex t) "@cindex{\\1}")) + ("option" 1 "@option{\\1}") + ("optional" 1 "[\\1]") + ("paragraph" 1 "@subsubheading \\1") + ("pep" 1 (progn (setq cindex t) "PEP@ \\1@cindex PEP \\1\n")) + ("pi" 0 "pi") + ("platform" 1 "") + ("plusminus" 0 "+-") + ("POSIX" 0 "POSIX") + ("production" 2 "@item \\1 \\2") + ("productioncont" 1 "@item @w{} \\1") + ("program" 1 "@command{\\1}") + ("programopt" 1 "@option{\\1}") + ("protect" 0 "") + ("pytype" 1 "@code{\\1}") + ("ref" 1 "@ref{\\1}") + ("refbimodindex" 1 (progn (setq moindex t) "@moindex{\\1}")) + ("refmodindex" 1 (progn (setq moindex t) "@moindex{\\1}")) + ("refmodule" 1 "@samp{\\1}") + ("refstmodindex" 1 (progn (setq moindex t) "@moindex{\\1}")) + ("regexp" 1 "\"\\1\"") + ("release" 1 + (progn (setq py2texi-python-version (match-string 1 string)) "")) + ("renewcommand" 2 "") + ("rfc" 1 (progn (setq cindex t) "RFC@ \\1@cindex RFC \\1\n")) + ("rm" 0 "@destroy") + ("samp" 1 "@samp{\\1}") + ("section" 1 (let ((str (match-string 1 string))) + (save-match-data + (if (string-match "\\(.*\\)[ \t\n]*---[ \t\n]*\\(.*\\)" + str) + (format + "@node %s\n@section %s\n" + (py2texi-backslash-quote (match-string 1 str)) + (py2texi-backslash-quote (match-string 2 str))) + "@node \\1\n@section \\1\n")))) + ("sectionauthor" 2 "") + ("seelink" 3 "\n@table @url\n@item @strong{\\1}\n(\\2)\n\\3\n@end table\n") + ("seemodule" 2 "@ref{\\1} \\2") + ("seepep" 3 "\n@table @strong\n@item PEP\\1 \\2\n\\3\n@end table\n") + ("seerfc" 3 "\n@table @strong\n@item RFC\\1 \\2\n\\3\n@end table\n") + ("seetext" 1 "\\1") + ("seetitle" 1 "@cite{\\1}") + ("seeurl" 2 "\n@table @url\n@item \\1\n\\2\n@end table\n") + ("setindexsubitem" 1 (progn (setq cindex t) "@cindex \\1")) + ("setlength" 2 "") + ("setreleaseinfo" 1 (progn (setq py2texi-releaseinfo ""))) + ("setshortversion" 1 + (progn (setq py2texi-python-short-version (match-string 1 string)) "")) + ("shortversion" 0 py2texi-python-short-version) + ("sqrt" 0 "") + ("stindex" 1 (progn (setq cindex t) "@cindex{\\1}")) + ("stmodindex" 1 (progn (setq moindex t) "@moindex{\\1}")) + ("strong" 1 "@strong{\\1}") + ("sub" 0 "/") + ("subsection" 1 "@node \\1\n@subsection \\1\n") + ("subsubsection" 1 "@node \\1\n@subsubsection \\1\n") + ("sum" 0 "") + ("tableofcontents" 0 "") + ("term" 1 "@item \\1") + ("TeX" 0 "@TeX{}") + ("textasciitilde" 0 "~") + ("textasciicircum" 0 "^") + ("textbackslash" 0 "@backslash{}") + ("textbar" 0 "|") + ("textbf" 1 "@strong{\\1}") + ("texteuro" 0 "@euro{}") + ; Unfortunately, this alternate spelling doesn't actually apply to + ; the usage found in Python Tutorial, which actually requires a + ; Euro symbol to make sense, so this is commented out as well. + ; ("texteuro" 0 "Euro ") + ("textgreater" 0 ">") + ("textit" 1 "@i{\\1}") + ("textless" 0 "<") + ("textrm" 1 "\\1") + ("texttt" 1 "@code{\\1}") + ("textunderscore" 0 "_") + ("tilde" 0 "~") + ("title" 1 (progn (setq title (match-string 1 string)) "@settitle \\1")) + ("today" 0 "@today{}") + ("token" 1 "@code{\\1}") + ("tt" 0 "@destroy") + ("ttindex" 1 (progn (setq cindex t) "@cindex{\\1}")) + ("u" 0 "@backslash{}u") + ("ulink" 2 "\\1") + ("UNIX" 0 "UNIX") + ("undefined" 0 "") + ("unspecified" 0 "@dots{}") + ("url" 1 "@url{\\1}") + ("usepackage" 1 "") + ("var" 1 "@var{\\1}") + ("verbatiminput" 1 "@code{\\1}") + ("version" 0 py2texi-python-version) + ("versionadded" 1 "@emph{Added in Python version \\1}") + ("versionchanged" 1 "@emph{Changed in Python version \\1}") + ("vskip" 1 "") + ("vspace" 1 "") + ("warning" 1 "@emph{\\1}") + ("withsubitem" 2 "\\2") + ("XXX" 1 "@strong{\\1}")) + "Associative list of command substitutions. +Each list item is of the form (COMMAND ARGNUM SUBSTITUTION) where: +- COMMAND is LaTeX command name +- ARGNUM is number of (required) command arguments +- SUBSTITUTION substitution for the command. It is evaled and you can + reference command arguments through the \\N regexp notation in strings.") + +(defvar py2texi-magic "@documentclass\n" + "\"Magic\" string for auxiliary insertion at the beginning of document.") + +(defvar py2texi-dirs '("./" "../texinputs/") + "Where to search LaTeX input files.") + +(defvar py2texi-buffer "*py2texi*" + "The name of a buffer where Texinfo is generated.") + +(defconst py2texi-xemacs (string-match "^XEmacs" (emacs-version)) + "Running under XEmacs?") + + +(defmacro py2texi-search (regexp &rest body) + `(progn + (goto-char (point-min)) + (while (re-search-forward ,regexp nil t) + ,@body))) + +(defmacro py2texi-search-safe (regexp &rest body) + `(py2texi-search ,regexp + (unless (py2texi-protected) + ,@body))) + + +(defun py2texi-message (message) + "Report message and stop if `py2texi-stop-on-problems' is non-nil." + (if py2texi-stop-on-problems + (error message) + (message message))) + + +(defun py2texi-backslash-quote (string) + "Double backslahes in STRING." + (let ((i 0)) + (save-match-data + (while (setq i (string-match "\\\\" string i)) + (setq string (replace-match "\\\\\\\\" t nil string)) + (setq i (+ i 2)))) + string)) + + +(defun py2texi (file) + "Convert Python LaTeX documentation FILE to Texinfo." + (interactive "fFile to convert: ") + (switch-to-buffer (get-buffer-create py2texi-buffer)) + (erase-buffer) + (insert-file file) + (let ((case-fold-search nil) + (title "") + (author "") + (author-address "") + (appendix nil) + (findex nil) + (obindex nil) + (cindex nil) + (moindex nil) + last-if) + (py2texi-process-verbatims) + (py2texi-process-comments) + (py2texi-process-includes) + (py2texi-process-funnyas) + (py2texi-process-environments) + (py2texi-process-commands) + (py2texi-fix-indentation) + (py2texi-fix-nodes) + (py2texi-fix-references) + (py2texi-fix-indices) + (py2texi-process-simple-commands) + (py2texi-fix-fonts) + (py2texi-fix-braces) + (py2texi-fix-backslashes) + (py2texi-destroy-empties) + (py2texi-fix-newlines) + (py2texi-adjust-level)) + (let* ((texi-file-name (or py2texi-texi-file-name + (py2texi-texi-file-name file))) + (info-file-name (or py2texi-info-file-name + (py2texi-info-file-name texi-file-name)))) + (goto-char (point-min)) + (when (looking-at py2texi-magic) + (delete-region (point) (progn (beginning-of-line 2) (point))) + (insert "\\input texinfo @c -*-texinfo-*-\n") + (insert "@setfilename " info-file-name)) + (when (re-search-forward "@chapter" nil t) + (texinfo-all-menus-update t)) + (goto-char (point-min)) + (write-file texi-file-name) + (message (format "You can apply `makeinfo %s' now." texi-file-name)))) + + +(defun py2texi-texi-file-name (filename) + "Generate name of Texinfo file from original file name FILENAME." + (concat filename + (if (string-match "\\.tex$" filename) "i" ".texi"))) + + +(defun py2texi-info-file-name (filename) + "Generate name of info file from original file name FILENAME." + (setq filename (expand-file-name filename)) + (let ((directory (file-name-directory filename)) + (basename (file-name-nondirectory filename))) + (concat directory "python-" + (substring basename 0 (- (length basename) 4)) "info"))) + + +(defun py2texi-process-verbatims () + "Process and protect verbatim environments." + (let (delimiter + beg + end) + (py2texi-search-safe "\\\\begin{\\(verbatim\\|displaymath\\)}" + (when (save-excursion + ; Make sure we aren't looking at a commented out version + ; of a verbatim environment + (beginning-of-line) + (not (looking-at "%"))) + (replace-match "@example ") + (setq beg (copy-marker (point) nil)) + (re-search-forward "\\\\end{\\(verbatim\\|displaymath\\)}") + (setq end (copy-marker (match-beginning 0) nil)) + (replace-match "@end example") + (py2texi-texinfo-escape beg end) + (put-text-property (- beg (length "@example ")) + (+ end (length "@end example")) + 'py2texi-protected t))) + (py2texi-search-safe "\\\\verb\\([^a-z]\\)" + (setq delimiter (match-string 1)) + (replace-match "@code{") + (setq beg (copy-marker (point) nil)) + (re-search-forward (regexp-quote delimiter)) + (setq end (copy-marker (match-beginning 0) nil)) + (replace-match "}") + (put-text-property (- beg (length "@code{")) (+ end (length "}")) + 'py2texi-protected t) + (py2texi-texinfo-escape beg end)))) + + +(defun py2texi-process-comments () + "Remove comments." + (let (point) + (py2texi-search-safe "%" + (setq point (point)) + (when (save-excursion + (re-search-backward "\\(^\\|[^\\]\\(\\\\\\\\\\)*\\)%\\=" nil t)) + (delete-region (1- point) + (save-excursion (beginning-of-line 2) (point))))))) + + +(defun py2texi-process-includes () + "Include LaTeX input files. +Do not include .ind files." + (let ((path (file-name-directory file)) + filename + dirs + includefile) + (py2texi-search-safe "\\\\input{\\([^}]+\\)}" + (setq filename (match-string 1)) + (unless (save-match-data (string-match "\\.tex$" filename)) + (setq filename (concat filename ".tex"))) + (setq includefile (save-match-data + (string-match "\\.ind\\.tex$" filename))) + (setq dirs py2texi-dirs) + (while (and (not includefile) dirs) + (setq includefile + (concat (file-name-as-directory (car dirs)) filename)) + (if (not (file-name-absolute-p includefile)) + (setq includefile + (concat (file-name-as-directory path) includefile))) + (unless (file-exists-p includefile) + (setq includefile nil) + (setq dirs (cdr dirs)))) + (if includefile + (save-restriction + (narrow-to-region (match-beginning 0) (match-end 0)) + (delete-region (point-min) (point-max)) + (when (stringp includefile) + (insert-file-contents includefile) + (goto-char (point-min)) + (insert "\n") + (py2texi-process-verbatims) + (py2texi-process-comments) + (py2texi-process-includes))) + (replace-match (format "\\\\emph{Included file %s}" filename)) + (py2texi-message (format "Input file %s not found" filename)))))) + + +(defun py2texi-process-funnyas () + "Convert @s." + (py2texi-search-safe "@" + (replace-match "@@"))) + + +(defun py2texi-process-environments () + "Process LaTeX environments." + (let ((stack ()) + kind + environment + parameter + arguments + n + string + description) + (py2texi-search-safe (concat "\\\\\\(begin\\|end\\|item\\)" + "\\({\\([^}]*\\)}\\|[[]\\([^]]*\\)[]]\\|\\)") + (setq kind (match-string 1) + environment (match-string 3) + parameter (match-string 4)) + (replace-match "") + (cond + ((string= kind "begin") + (setq description (assoc environment py2texi-environments)) + (if description + (progn + (setq n (cadr description)) + (setq description (cddr description)) + (setq string (py2texi-tex-arguments n)) + (string-match (py2texi-regexp n) string) + ; incorrect but sufficient + (insert (replace-match (eval (car description)) + t nil string)) + (setq stack (cons (cadr description) stack))) + (py2texi-message (format "Unknown environment: %s" environment)) + (setq stack (cons "" stack)))) + ((string= kind "end") + (insert (eval (car stack))) + (setq stack (cdr stack))) + ((string= kind "item") + (insert "\n@item " (or parameter "") "\n")))) + (when stack + (py2texi-message (format "Unclosed environment: %s" (car stack)))))) + + +(defun py2texi-process-commands () + "Process LaTeX commands." + (let (done + command + command-info + string + n) + (while (not done) + (setq done t) + (py2texi-search-safe "\\\\\\([a-zA-Z*]+\\)\\(\\[[^]]*\\]\\)?" + (setq command (match-string 1)) + (setq command-info (assoc command py2texi-commands)) + (if command-info + (progn + (setq done nil) + (replace-match "") + (setq command-info (cdr command-info)) + (setq n (car command-info)) + (setq string (py2texi-tex-arguments n)) + (string-match (py2texi-regexp n) string) + ; incorrect but sufficient + (insert (replace-match (eval (cadr command-info)) + t nil string))) + (py2texi-message (format "Unknown command: %s (not processed)" + command))))))) + + +(defun py2texi-argument-pattern (count) + (let ((filler "\\(?:[^{}]\\|\\\\{\\|\\\\}\\)*")) + (if (<= count 0) + filler + (concat filler "\\(?:{" + (py2texi-argument-pattern (1- count)) + "}" filler "\\)*" filler)))) +(defconst py2texi-tex-argument + (concat + "{\\(" + (py2texi-argument-pattern 10) ;really at least 10! + "\\)}[ \t%@c\n]*") + "Regexp describing LaTeX command argument including argument separators.") + + +(defun py2texi-regexp (n) + "Make regexp matching N LaTeX command arguments." + (if (= n 0) + "" + (let ((regexp "^[^{]*")) + (while (> n 0) + (setq regexp (concat regexp py2texi-tex-argument)) + (setq n (1- n))) + regexp))) + + +(defun py2texi-tex-arguments (n) + "Remove N LaTeX command arguments and return them as a string." + (let ((point (point)) + (i 0) + result + match) + (if (= n 0) + (progn + (when (re-search-forward "\\=\\({}\\| *\\)" nil t) + (replace-match "")) + "") + (while (> n 0) + (unless (re-search-forward + "\\(\\=\\|[^\\\\]\\)\\(\\\\\\\\\\)*\\([{}]\\)" nil t) + (debug)) + (if (string= (match-string 3) "{") + (setq i (1+ i)) + (setq i (1- i)) + (when (<= i 0) + (setq n (1- n))))) + (setq result (buffer-substring-no-properties point (point))) + (while (string-match "\n[ \t]*" result) + (setq result (replace-match " " t nil result))) + (delete-region point (point)) + result))) + + +(defun py2texi-process-simple-commands () + "Replace single character LaTeX commands." + (let (char) + (py2texi-search-safe "\\\\\\([^a-z]\\)" + (setq char (match-string 1)) + (replace-match (format "%s%s" + (if (or (string= char "{") + (string= char "}") + (string= char " ")) + "@" + "") + (if (string= char "\\") + "\\\\" + char)))))) + + +(defun py2texi-fix-indentation () + "Remove white space at the beginning of lines." + (py2texi-search-safe "^[ \t]+" + (replace-match ""))) + + +(defun py2texi-fix-nodes () + "Remove unwanted characters from nodes and make nodes unique." + (let ((nodes (make-hash-table :test 'equal)) + id + counter + string + label + index) + (py2texi-search "^@node +\\(.*\\)$" + (setq string (match-string 1)) + (if py2texi-xemacs + (replace-match "@node " t) + (replace-match "" t nil nil 1)) + (while (string-match "@label{[^}]*}" string) + (setq label (match-string 0 string)) + (setq string (replace-match "" t nil string))) + (while (string-match "@..?index{[^}]*}" string) + (setq index (match-string 0 string)) + (setq string (replace-match "" t nil string))) + (while (string-match "@[a-zA-Z]+\\|[{}():]\\|``\\|''" string) + (setq string (replace-match "" t nil string))) + (while (string-match " -- " string) + (setq string (replace-match " - " t nil string))) + (while (string-match "\\." string) + (setq string (replace-match "" t nil string))) + (when (string-match " +$" string) + (setq string (replace-match "" t nil string))) + (when (string-match "^\\(Built-in\\|Standard\\) Module \\|The " string) + (setq string (replace-match "" t nil string))) + (string-match "^[^,]+" string) + (setq id (match-string 0 string)) + (setq counter (gethash id nodes)) + (if counter + (progn + (setq counter (1+ counter)) + (setq string (replace-match (format "\\& %d" counter) + t nil string))) + (setq counter 1)) + (setf (gethash id nodes) counter) + (insert string) + (beginning-of-line 3) + (when label + (insert label "\n")) + (when index + (insert index "\n"))))) + + +(defun py2texi-fix-references () + "Process labels and make references to point to appropriate nodes." + (let ((labels ()) + node) + (py2texi-search-safe "@label{\\([^}]*\\)}" + (setq node (save-excursion + (save-match-data + (and (re-search-backward "@node +\\([^,\n]+\\)" nil t) + (match-string 1))))) + (when node + (setq labels (cons (cons (match-string 1) node) labels))) + (replace-match "")) + (py2texi-search-safe "@ref{\\([^}]*\\)}" + (setq node (assoc (match-string 1) labels)) + (replace-match "") + (when node + (insert (format "@ref{%s}" (cdr node))))))) + + +(defun py2texi-fix-indices () + "Remove unwanted characters from @*index commands and create final indices." + (py2texi-search-safe "@..?index\\>[^\n]*\\(\\)\n" + (replace-match "" t nil nil 1)) + (py2texi-search-safe "@..?index\\>[^\n]*\\(\\)" + (replace-match "\n" t nil nil 1)) + (py2texi-search-safe "@..?index\\({\\)\\([^}]+\\)\\(}+\\)" + (replace-match " " t nil nil 1) + (replace-match "" t nil nil 3) + (let ((string (match-string 2))) + (save-match-data + (while (string-match "@[a-z]+{" string) + (setq string (replace-match "" nil nil string))) + (while (string-match "{" string) + (setq string (replace-match "" nil nil string)))) + (replace-match string t t nil 2))) + (py2texi-search-safe "@..?index\\>.*\\([{}]\\|@[a-z]*\\)" + (replace-match "" t nil nil 1) + (goto-char (match-beginning 0))) + (py2texi-search-safe "[^\n]\\(\\)@..?index\\>" + (replace-match "\n" t nil nil 1)) + (goto-char (point-max)) + (re-search-backward "@indices") + (replace-match "") + (insert (if moindex + (concat "@node Module Index\n" + "@unnumbered Module Index\n" + "@printindex mo\n") + "") + (if obindex + (concat "@node Class-Exception-Object Index\n" + "@unnumbered Class, Exception, and Object Index\n" + "@printindex ob\n") + "") + (if findex + (concat "@node Function-Method-Variable Index\n" + "@unnumbered Function, Method, and Variable Index\n" + "@printindex fn\n") + "") + (if cindex + (concat "@node Miscellaneous Index\n" + "@unnumbered Miscellaneous Index\n" + "@printindex cp\n") + ""))) + + +(defun py2texi-fix-backslashes () + "Make backslashes from auxiliary commands." + (py2texi-search-safe "@backslash{}" + (replace-match "\\\\"))) + + +(defun py2texi-fix-fonts () + "Remove garbage after unstructured font commands." + (let (string) + (py2texi-search-safe "@destroy" + (replace-match "") + (when (eq (preceding-char) ?{) + (forward-char -1) + (setq string (py2texi-tex-arguments 1)) + (insert (substring string 1 (1- (length string)))))))) + + +(defun py2texi-fix-braces () + "Escape braces for Texinfo." + (py2texi-search "{@{}" + (replace-match "@{")) + (py2texi-search "{@}}" + (replace-match "@}")) + (let (string) + (py2texi-search "{" + (unless (or (py2texi-protected) + (save-excursion + (re-search-backward + "@\\([a-zA-Z]*\\|multitable.*\\){\\=" nil t))) + (forward-char -1) + (setq string (py2texi-tex-arguments 1)) + (insert "@" (substring string 0 (1- (length string))) "@}"))))) + + +(defun py2texi-fix-newlines () + "Remove extra newlines." + (py2texi-search "\n\n\n+" + (replace-match "\n\n")) + (py2texi-search-safe "@item.*\n\n" + (delete-backward-char 1)) + (py2texi-search "@end example" + (unless (looking-at "\n\n") + (insert "\n")))) + + +(defun py2texi-destroy-empties () + "Remove all comments. +This avoids some makeinfo errors." + (py2texi-search "@c\\>" + (unless (eq (py2texi-protected) t) + (delete-region (- (point) 2) (save-excursion (end-of-line) (point))) + (cond + ((looking-at "\n\n") + (delete-char 1)) + ((save-excursion (re-search-backward "^[ \t]*\\=" nil t)) + (delete-region (save-excursion (beginning-of-line) (point)) + (1+ (point)))))))) + + +(defun py2texi-adjust-level () + "Increase heading level to @chapter, if needed. +This is only needed for distutils, so it has a very simple form only." + (goto-char (point-min)) + (unless (re-search-forward "@chapter\\>" nil t) + (py2texi-search-safe "@section\\>" + (replace-match "@chapter" t)) + (py2texi-search-safe "@\\(sub\\)\\(sub\\)?section\\>" + (replace-match "" nil nil nil 1)))) + + +(defun py2texi-texinfo-escape (beg end) + "Escape Texinfo special characters in region." + (save-excursion + (goto-char beg) + (while (re-search-forward "[@{}]" end t) + (replace-match "@\\&")))) + + +(defun py2texi-protected () + "Return protection status of the point before current point." + (get-text-property (1- (point)) 'py2texi-protected)) + + +;;; Announce + +(provide 'py2texi) + + +;;; py2texi.el ends here diff --git a/sys/src/cmd/python/Doc/tools/refcounts.py b/sys/src/cmd/python/Doc/tools/refcounts.py new file mode 100644 index 000000000..31cfe909e --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/refcounts.py @@ -0,0 +1,98 @@ +"""Support functions for loading the reference count data file.""" +__version__ = '$Revision: 35267 $' + +import os +import sys + + +# Determine the expected location of the reference count file: +try: + p = os.path.dirname(__file__) +except NameError: + p = os.path.dirname(sys.argv[0]) +p = os.path.normpath(os.path.join(os.getcwd(), p, os.pardir, + "api", "refcounts.dat")) +DEFAULT_PATH = p +del p + + +def load(path=DEFAULT_PATH): + return loadfile(open(path)) + + +def loadfile(fp): + d = {} + while 1: + line = fp.readline() + if not line: + break + line = line.strip() + if line[:1] in ("", "#"): + # blank lines and comments + continue + parts = line.split(":", 4) + if len(parts) != 5: + raise ValueError("Not enough fields in %r" % line) + function, type, arg, refcount, comment = parts + if refcount == "null": + refcount = None + elif refcount: + refcount = int(refcount) + else: + refcount = None + # + # Get the entry, creating it if needed: + # + try: + entry = d[function] + except KeyError: + entry = d[function] = Entry(function) + # + # Update the entry with the new parameter or the result information. + # + if arg: + entry.args.append((arg, type, refcount)) + else: + entry.result_type = type + entry.result_refs = refcount + return d + + +class Entry: + def __init__(self, name): + self.name = name + self.args = [] + self.result_type = '' + self.result_refs = None + + +def dump(d): + """Dump the data in the 'canonical' format, with functions in + sorted order.""" + items = d.items() + items.sort() + first = 1 + for k, entry in items: + if first: + first = 0 + else: + print + s = entry.name + ":%s:%s:%s:" + if entry.result_refs is None: + r = "" + else: + r = entry.result_refs + print s % (entry.result_type, "", r) + for t, n, r in entry.args: + if r is None: + r = "" + print s % (t, n, r) + + +def main(): + d = load() + dump(d) + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/rewrite.py b/sys/src/cmd/python/Doc/tools/rewrite.py new file mode 100644 index 000000000..1acdd9984 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/rewrite.py @@ -0,0 +1,54 @@ +"""Simple script to replace @DATE@ and friends with real information. + +Usage: rewrite.py boilerplate.tex [VAR=value] ... <template >output +""" + +import sys +import time + + +def get_info(fp): + s = fp.read() + + d = {} + start = s.find(r"\date{") + if start >= 0: + end = s.find("}", start) + date = s[start+6:end] + if date == r"\today": + date = time.strftime("%B %d, %Y", time.localtime(time.time())) + d["DATE"] = date + return d + + +def main(): + s = sys.stdin.read() + if "@" in s: + # yes, we actully need to load the replacement values + d = get_info(open(sys.argv[1])) + for arg in sys.argv[2:]: + name, value = arg.split("=", 1) + d[name] = value + start = 0 + while 1: + start = s.find("@", start) + if start < 0: + break + end = s.find("@", start+1) + name = s[start+1:end] + if name: + value = d.get(name) + if value is None: + start = end + 1 + else: + s = s[:start] + value + s[end+1:] + start = start + len(value) + else: + # "@@" --> "@" + s = s[:start] + s[end:] + start = end + sys.stdout.write(s) + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/Makefile b/sys/src/cmd/python/Doc/tools/sgmlconv/Makefile new file mode 100644 index 000000000..d222933e7 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/Makefile @@ -0,0 +1,67 @@ +# Simple makefile to control XML generation for the entire document tree. +# This should be used from the top-level directory (Doc/), not the directory +# that actually contains this file: +# +# $ pwd +# .../Doc +# $ make -f tools/sgmlconv/Makefile + +TOPDIR=. +TOOLSDIR=tools + +SGMLRULES=../$(TOOLSDIR)/sgmlconv/make.rules +# The 'inst' and 'tut' directories break the conversion, so skip them for now. +SUBDIRS=api dist ext lib mac ref +SUBMAKE=$(MAKE) -f $(SGMLRULES) TOOLSDIR=../$(TOOLSDIR) + +all: xml + +.PHONY: esis xml +.PHONY: $(SUBDIRS) + +xml: + for DIR in $(SUBDIRS) ; do \ + (cd $$DIR && $(SUBMAKE) xml) || exit $$? ; done + +esis: + for DIR in $(SUBDIRS) ; do \ + (cd $$DIR && $(SUBMAKE) esis) || exit $$? ; done + +esis1: + for DIR in $(SUBDIRS) ; do \ + (cd $$DIR && $(SUBMAKE) esis1) || exit $$? ; done + +tarball: xml + tar cf - tools/sgmlconv */*.xml | gzip -9 >xml-1.5.2b2.tgz + +api: + cd api && $(SUBMAKE) + +dist: + cd dist && $(SUBMAKE) + +ext: + cd ext && $(SUBMAKE) + +inst: + cd inst && $(SUBMAKE) + +lib: + cd lib && $(SUBMAKE) + +mac: + cd mac && $(SUBMAKE) + +ref: + cd ref && $(SUBMAKE) + +tut: + cd tut && $(SUBMAKE) + +clean: + for DIR in $(SUBDIRS) ; do \ + (cd $$DIR && $(SUBMAKE) clean) || exit $$? ; done + +clobber: + for DIR in $(SUBDIRS) ; do \ + (cd $$DIR && $(SUBMAKE) clobber) || exit $$? ; done diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/README b/sys/src/cmd/python/Doc/tools/sgmlconv/README new file mode 100644 index 000000000..02564eb5e --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/README @@ -0,0 +1,58 @@ +These scripts and Makefile fragment are used to convert the Python +documentation in LaTeX format to XML. + +This material is preliminary and incomplete. Python 2.0 is required. + +To convert all documents to XML: + + cd Doc/ + make -f tools/sgmlconv/Makefile + +To convert one document to XML: + + cd Doc/<document-dir> + make -f ../tools/sgmlconv/make.rules TOOLSDIR=../tools + +Please send comments and bug reports to docs@python.org. + + +What do the tools do? +--------------------- + +latex2esis.py + Reads in a conversion specification written in XML + (conversion.xml), reads a LaTeX document fragment, and interprets + the markup according to the specification. The output is a stream + of ESIS events like those created by the nsgmls SGML parser, but + is *not* guaranteed to represent a single tree! This is done to + allow conversion per entity rather than per document. Since many + of the LaTeX files for the Python documentation contain two + sections on closely related modules, it is important to allow both + of the resulting <section> elements to exist in the same output + stream. Additionally, since comments are not supported in ESIS, + comments are converted to <COMMENT> elements, which might exist at + the same level as the top-level content elements. + + The output of latex2esis.py gets saved as <filename>.esis1. + +docfixer.py + This is the really painful part of the conversion. Well, it's the + second really painful part, but more of the pain is specific to + the structure of the Python documentation and desired output + rather than to the parsing of LaTeX markup. + + This script loads the ESIS data created by latex2esis.py into a + DOM document *fragment* (remember, the latex2esis.py output may + not be well-formed). Once loaded, it walks over the tree many + times looking for a variety of possible specific + micro-conversions. Most of the code is not in any way "general". + After processing the fragment, a new ESIS data stream is written + out. Like the input, it may not represent a well-formed + document, but does represent a parsed entity. + + The output of docfixer.py is what gets saved in <filename>.esis. + +esis2sgml.py + Reads an ESIS stream and convert to SGML or XML. This also + converts <COMMENT> elements to real comments. This works quickly + because there's not much to actually do. diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/conversion.xml b/sys/src/cmd/python/Doc/tools/sgmlconv/conversion.xml new file mode 100644 index 000000000..f0151f437 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/conversion.xml @@ -0,0 +1,914 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<conversion> + <!-- Miscellaneous. --> + <macro name="declaremodule"> + <attribute name="id" optional="yes"/> + <attribute name="type"/> + <attribute name="name"/> + </macro> + <macro name="modulesynopsis"> + <content/> + </macro> + <macro name="platform"> + <content/> + </macro> + <macro name="deprecated"> + <attribute name="version"/> + <content/> + </macro> + <macro name="label"> + <attribute name="id"/> + </macro> + <macro name="nodename" outputname="label"> + <attribute name="id"/> + </macro> + <macro name="localmoduletable"/> + <macro name="manpage"> + <attribute name="name"/> + <attribute name="section"/> + </macro> + <macro name="module"> + <content/> + </macro> + <macro name="moduleauthor"> + <attribute name="name"/> + <attribute name="email"/> + </macro> + <macro name="citetitle"> + <attribute name="href" optional="yes"/> + <content/> + </macro> + <macro name="pep"> + <attribute name="num"/> + </macro> + <macro name="rfc"> + <attribute name="num"/> + </macro> + <macro name="sectionauthor" outputname="author"> + <attribute name="name"/> + <attribute name="email"/> + </macro> + <macro name="author"> + <attribute name="name"/> + </macro> + <macro name="authoraddress"> + <content/> + </macro> + <macro name="shortversion"/> + <macro name="note"> + <content/> + </macro> + <macro name="warning"> + <content/> + </macro> + <environment name="notice"> + <attribute name="role" optional="yes"/> + </environment> + + <macro name="menuselection"> + <content/> + </macro> + <macro name="sub"/> + + <!-- These are broken: we need to re-order the optional and required + parameters, making the optional parameter the content for the + element. latex2esis.py is not powerful enough to handle this. + --> + <macro name="versionadded"> + <attribute name="info" optional="yes"/> + <attribute name="version"/> + </macro> + <macro name="versionchanged"> + <attribute name="info" optional="yes"/> + <attribute name="version"/> + </macro> + + <!-- Module referencing. --> + <macro name="refmodule" outputname="module"> + <!-- this causes the optional parameter to \refmodule to be + discarded --> + <attribute name="" optional="yes"/> + <content/> + </macro> + + <!-- Information units. --> + <!-- C things. --> + <environment name="cfuncdesc"> + <attribute name="type"/> + <attribute name="name"/> + <child name="args"/> + </environment> + <environment name="csimplemacrodesc"> + <attribute name="name"/> + </environment> + <environment name="ctypedesc"> + <attribute name="tag" optional="yes"/> + <attribute name="name"/> + </environment> + <environment name="cvardesc"> + <attribute name="type"/> + <attribute name="name"/> + </environment> + + <!-- Python things. --> + <macro name="optional"> + <content/> + </macro> + <macro name="unspecified"/> + <macro name="moreargs"/> + <environment name="classdesc"> + <attribute name="name"/> + <child name="args"/> + </environment> + <environment name="classdesc*" outputname="classdesc"> + <attribute name="name"/> + </environment> + <environment name="datadesc"> + <attribute name="name"/> + </environment> + <environment name="datadescni" outputname="datadesc"> + <attribute name="index">no</attribute> + <attribute name="name"/> + </environment> + <macro name="dataline"> + <attribute name="name"/> + </macro> + <environment name="excclassdesc"> + <attribute name="name"/> + <child name="args"/> + </environment> + <environment name="excdesc"> + <attribute name="name"/> + </environment> + + <environment name="funcdesc"> + <attribute name="name"/> + <child name="args"/> + </environment> + <macro name="funcline"> + <attribute name="name"/> + <child name="args"/> + </macro> + <environment name="funcdescni" outputname="funcdesc"> + <attribute name="index">no</attribute> + <attribute name="name"/> + <child name="args"/> + </environment> + <macro name="funclineni" outputname="funcline"> + <attribute name="index">no</attribute> + <attribute name="name"/> + <child name="args"/> + </macro> + + <environment name="memberdesc"> + <attribute name="class" optional="yes"/> + <attribute name="name"/> + </environment> + <environment name="memberdescni" outputname="memberdesc"> + <attribute name="index">no</attribute> + <attribute name="class" optional="yes"/> + <attribute name="name"/> + </environment> + <macro name="memberline"> + <attribute name="name"/> + </macro> + + <environment name="methoddesc"> + <attribute name="class" optional="yes"/> + <attribute name="name"/> + <child name="args"/> + </environment> + <macro name="methodline"> + <attribute name="class" optional="yes"/> + <attribute name="name"/> + <child name="args"/> + </macro> + <environment name="methoddescni"> + <attribute name="index">no</attribute> + <attribute name="class" optional="yes"/> + <attribute name="name"/> + <child name="args"/> + </environment> + <macro name="methodlineni" outputname="methodline"> + <attribute name="index">no</attribute> + <attribute name="class" optional="yes"/> + <attribute name="name"/> + <child name="args"/> + </macro> + + <environment name="opcodedesc"> + <attribute name="name"/> + <attribute name="var"/> + </environment> + + <!-- "See also:" sections. --> + <environment name="seealso*" outputname="seealso"> + <attribute name="sidebar">no</attribute> + </environment> + <macro name="seemodule"> + <!-- this causes the optional parameter to \seemodule to be + discarded --> + <attribute name="" optional="yes"/> + <attribute name="name"/> + <child name="description"/> + </macro> + <macro name="seepep"> + <attribute name="number"/> + <child name="title"/> + <child name="description"/> + </macro> + <macro name="seerfc"> + <attribute name="number"/> + <child name="title"/> + <child name="description"/> + </macro> + <macro name="seetext"> + <child name="description"/> + </macro> + <macro name="seetitle"> + <attribute name="href" optional="yes"/> + <child name="title"/> + <child name="description"/> + </macro> + <macro name="seeurl"> + <attribute name="href"/> + <child name="description"/> + </macro> + + <!-- Index-generating markup. --> + <macro name="index" outputname="indexterm"> + <attribute name="term1"/> + </macro> + <macro name="indexii" outputname="indexterm"> + <attribute name="term1"/> + <attribute name="term2"/> + </macro> + <macro name="indexiii" outputname="indexterm"> + <attribute name="term1"/> + <attribute name="term2"/> + <attribute name="term3"/> + </macro> + <macro name="indexiv" outputname="indexterm"> + <attribute name="term1"/> + <attribute name="term2"/> + <attribute name="term3"/> + <attribute name="term4"/> + </macro> + + <macro name="ttindex" outputname="indexterm"> + <attribute name="style">tt</attribute> + <attribute name="term1"/> + </macro> + + <macro name="refmodindex"> + <attribute name="module"/> + </macro> + <macro name="stmodindex"> + <attribute name="module"/> + </macro> + <macro name="refbimodindex" outputname="refmodindex"> + <attribute name="module"/> + </macro> + <macro name="refexmodindex" outputname="refmodindex"> + <attribute name="module"/> + </macro> + <macro name="refstmodindex" outputname="refmodindex"> + <attribute name="module"/> + </macro> + + <macro name="bifuncindex"> + <attribute name="name"/> + </macro> + <macro name="exindex"> + <attribute name="name"/> + </macro> + <macro name="obindex"> + <attribute name="name"/> + </macro> + <macro name="kwindex"> + <attribute name="name"/> + </macro> + <macro name="opindex"> + <attribute name="type"/> + </macro> + <macro name="stindex"> + <attribute name="type"/> + </macro> + <macro name="withsubitem"> + <attribute name="text"/> + <content/> + </macro> + <macro name="setindexsubitem"> + <attribute name="text"/> + </macro> + + <!-- Entity management. --> + <macro name="include" outputname="xi:include"> + <attribute name="href"/> + </macro> + <macro name="input" outputname="xi:include"> + <attribute name="href"/> + </macro> + + <!-- Large-scale document structure. --> + <macro name="documentclass"> + <attribute name="classname"/> + </macro> + + <macro name="usepackage"> + <attribute name="options" optional="yes"/> + <attribute name="pkg"/> + </macro> + + <environment name="document" + endcloses="chapter chapter* section section* + subsection subsection* + subsubsection subsubsection* + paragraph paragraph* subparagraph + subparagraph*"> + <attribute name="xmlns:xi" + >http://www.w3.org/2001/XInclude</attribute> + </environment> + + <macro name="chapter" + closes="chapter chapter* section section* subsection subsection* + subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + <macro name="chapter*" outputname="chapter" + closes="chapter chapter* section section* subsection subsection* + subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <attribute name="numbered">no</attribute> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + + <macro name="section" + closes="section section* subsection subsection* + subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + <macro name="section*" outputname="section" + closes="section section* subsection subsection* + subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <attribute name="numbered">no</attribute> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + + <macro name="subsection" + closes="subsection subsection* subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + <macro name="subsection*" outputname="subsection" + closes="subsection subsection* subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <attribute name="numbered">no</attribute> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + + <macro name="subsubsection" + closes="subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + <macro name="subsubsection*" outputname="subsubsection" + closes="subsubsection subsubsection* + paragraph paragraph* subparagraph subparagraph*"> + <attribute name="numbered">no</attribute> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + + <macro name="paragraph" + closes="paragraph paragraph* subparagraph subparagraph*"> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + <macro name="paragraph*" outputname="paragraph" + closes="paragraph paragraph* subparagraph subparagraph*"> + <attribute name="numbered">no</attribute> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + + <macro name="subparagraph" + closes="subparagraph subparagraph*"> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + <macro name="subparagraph*" outputname="subparagraph" + closes="subparagraph subparagraph*"> + <attribute name="numbered">no</attribute> + <text> +</text> + <child name="title"/> + <content implied="yes"/> + </macro> + <macro name="title"> + <content/> + </macro> + + <macro name="appendix" outputname="back-matter" + closes="chapter chapter* section subsection subsubsection + paragraph subparagraph"/> + + <environment name="list" + endcloses="item"> + <attribute name="bullet"/> + <attribute name="init"/> + </environment> + <macro name="item" closes="item"> + <child name="leader" optional="yes"/> + <content implied="yes"/> + </macro> + + <macro name="ref"> + <attribute name="ref"/> + </macro> + + <environment name="description" outputname="descriptionlist" + endcloses="item"/> + + <environment name="enumerate" outputname="enumeration" + endcloses="item"/> + + <environment name="fulllineitems" + endcloses="item"/> + + <environment name="itemize" + endcloses="item"/> + + <environment name="definitions" outputname="definitionlist" + encloses="term"/> + <macro name="term" closes="definition"> + <!-- not really optional, but uses the [] syntax --> + <child name="term" optional="yes"/> + <child name="definition" implied="yes"/> + </macro> + + <environment name="alltt" outputname="verbatim"/> + <environment name="comment" verbatim="yes"/> + <environment name="verbatim" verbatim="yes"/> + <environment name="verbatim*" verbatim="yes"> + <!-- not used anywhere, but it's a standard LaTeXism --> + <attribute name="spaces">visible</attribute> + </environment> + <macro name="verbatiminput" ouptutname="xi:include"> + <attribute name="parse">text</attribute> + <attribute name="href"/> + </macro> + + <!-- Table markup. --> + <macro name="hline"/> + <environment name="tableii" outputname="table"> + <attribute name="cols">2</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <environment name="longtableii" outputname="table"> + <attribute name="cols">2</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <macro name="lineii" outputname="row"> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </macro> + + <environment name="tableiii" outputname="table"> + <attribute name="cols">3</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <environment name="longtableiii" outputname="table"> + <attribute name="cols">3</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <macro name="lineiii" outputname="row"> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </macro> + + <environment name="tableiv" outputname="table"> + <attribute name="cols">4</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <environment name="longtableiv" outputname="table"> + <attribute name="cols">4</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <macro name="lineiv" outputname="row"> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </macro> + + <environment name="tablev" outputname="table"> + <attribute name="cols">4</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <environment name="longtablev" outputname="table"> + <attribute name="cols">4</attribute> + <attribute name="colspec"/> + <attribute name="style"/> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </environment> + <macro name="linev" outputname="row"> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + <text> + </text> + <child name="entry"/> + </macro> + + <!-- These are handled at a later translation stage, at least for now. --> + <macro name="Cpp" outputname=""> + <text>C++</text> + </macro> + <macro name="geq" outputname=""> + <entityref name="geq"/> + </macro> + <macro name="infinity" outputname=""> + <entityref name="infin"/> + </macro> + <macro name="LaTeX" outputname=""> + <text>LaTeX</text> + </macro> + <macro name="ldots" outputname=""> + <text>...</text> + </macro> + <macro name="leq" outputname=""> + <entityref name="leq"/> + </macro> + <macro name="plusminus" outputname=""> + <entityref name="plusmn"/> + </macro> + <macro name="TeX" outputname=""> + <text>TeX</text> + </macro> + <macro name="version"/> + + <!-- Distutils things. --> + <macro name="command"> + <content/> + </macro> + <macro name="option"> + <content/> + </macro> + <macro name="filevar" outputname="var"> + <content/> + </macro> + <macro name="XXX" outputname="editorial-comment"> + <content/> + </macro> + + <!-- Grammar production lists --> + <environment name="productionlist"> + <attribute name="grammar" optional="yes"/> + </environment> + <macro name="production"> + <attribute name="token"/> + <content/> + </macro> + <macro name="productioncont"> + <content/> + </macro> + <macro name="token" outputname="grammartoken"> + <content/> + </macro> + <macro name="grammartoken"> + <content/> + </macro> + + <!-- Misc. --> + <macro name="emph"> + <content/> + </macro> + <macro name="strong"> + <content/> + </macro> + <macro name="textrm"> + <content/> + </macro> + <macro name="texttt"> + <content/> + </macro> + <macro name="code"> + <content/> + </macro> + <macro name="exception"> + <content/> + </macro> + <macro name="keyword"> + <content/> + </macro> + <macro name="samp"> + <content/> + </macro> + <macro name="class"> + <content/> + </macro> + <macro name="cdata"> + <content/> + </macro> + <macro name="cfunction"> + <content/> + </macro> + <macro name="csimplemacro"> + <content/> + </macro> + <macro name="ctype"> + <content/> + </macro> + <macro name="pytype"> + <content/> + </macro> + <macro name="character"> + <content/> + </macro> + <macro name="constant"> + <content/> + </macro> + <macro name="envvar" outputname="envar"> + <content/> + </macro> + <macro name="file" outputname="filename"> + <content/> + </macro> + <macro name="filenq" outputname="filename"> + <attribute name="quote">no</attribute> + <content/> + </macro> + <macro name="function"> + <content/> + </macro> + <macro name="kbd" outputname="keysym"> + <content/> + </macro> + <macro name="mailheader"> + <content/> + </macro> + <macro name="makevar"> + <content/> + </macro> + <macro name="method"> + <content/> + </macro> + <macro name="member"> + <content/> + </macro> + <macro name="mimetype"> + <content/> + </macro> + <macro name="newsgroup"> + <content/> + </macro> + <macro name="program" outputname="command"> + <content/> + </macro> + <macro name="programopt" outputname="option"> + <content/> + </macro> + <macro name="longprogramopt" outputname="longoption"> + <content/> + </macro> + <macro name="regexp"> + <content/> + </macro> + <macro name="var"> + <content/> + </macro> + <macro name="email"> + <content/> + </macro> + <macro name="ulink"> + <!-- order of the parameters makes this difficult; + we'll need to fix it up to <ulink href="...">...</ulink> + in docfixer.py. + --> + <child name="text"/> + <child name="href"/> + </macro> + <macro name="url"> + <content/> + </macro> + <macro name="footnote"> + <content/> + </macro> + <macro name="dfn" outputname="definedterm"> + <content/> + </macro> + + <macro name="mbox"> + <content/> + </macro> + + <!-- minimal math stuff to get by --> + <macro name="pi"/> + <macro name="sqrt"> + <content/> + </macro> + <macro name="frac" outputname="fraction"> + <child name="numerator"/> + <child name="denominator"/> + </macro> + <macro name="sum"> + <content/> + </macro> + + <macro name="leftline" outputname=""> + <content/> + </macro> + + <!-- Conversions to text; perhaps could be different? There's --> + <!-- no way for a style sheet to work with these this way. --> + <macro name="ABC" outputname=""> + <text>ABC</text> + </macro> + <macro name="ASCII" outputname=""> + <text>ASCII</text> + </macro> + <macro name="C" outputname=""> + <text>C</text> + </macro> + <macro name="EOF" outputname=""> + <text>EOF</text> + </macro> + <macro name="e" outputname=""> + <text>\</text> + </macro> + <macro name="NULL" outputname="constant"> + <text>NULL</text> + </macro> + <macro name="POSIX" outputname=""> + <text>POSIX</text> + </macro> + <macro name="UNIX" outputname=""> + <text>Unix</text> + </macro> + <macro name="textasciicircum" outputname=""> + <text>^</text> + </macro> + <macro name="textasciitilde" outputname=""> + <text>~</text> + </macro> + <macro name="textbackslash" outputname=""> + <text>\</text> + </macro> + <macro name="textbar" outputname=""> + <text>|</text> + </macro> + <macro name="textgreater" outputname=""> + <text>></text> + </macro> + <macro name="textless" outputname=""> + <text><</text> + </macro> + + <!-- These will end up disappearing as well! --> + <macro name="catcode" outputname=""/> + <macro name="fi" outputname=""/> + <macro name="ifhtml" outputname=""/> + <macro name="indexname" outputname=""/> + <macro name="labelwidth" outputname=""/> + <macro name="large" outputname=""/> + <macro name="leftmargin" outputname=""/> + <macro name="makeindex" outputname=""/> + <macro name="makemodindex" outputname=""/> + <macro name="maketitle" outputname=""/> + <macro name="noindent" outputname=""/> + <macro name="protect" outputname=""/> + <macro name="textwidth"/> + <macro name="renewcommand"> + <attribute name="macro"/> + <attribute name="nargs" optional="yes"/> + <content/> + </macro> + <macro name="tableofcontents" outputname=""/> + <macro name="vspace"> + <attribute name="size"/> + </macro> +</conversion> diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/docfixer.py b/sys/src/cmd/python/Doc/tools/sgmlconv/docfixer.py new file mode 100755 index 000000000..81519ee58 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/docfixer.py @@ -0,0 +1,1073 @@ +#! /usr/bin/env python + +"""Perform massive transformations on a document tree created from the LaTeX +of the Python documentation, and dump the ESIS data for the transformed tree. +""" + + +import errno +import esistools +import re +import sys +import xml.dom +import xml.dom.minidom + +ELEMENT = xml.dom.Node.ELEMENT_NODE +ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE +TEXT = xml.dom.Node.TEXT_NODE + + +class ConversionError(Exception): + pass + + +ewrite = sys.stderr.write +try: + # We can only do this trick on Unix (if tput is on $PATH)! + if sys.platform != "posix" or not sys.stderr.isatty(): + raise ImportError + import commands +except ImportError: + bwrite = ewrite +else: + def bwrite(s, BOLDON=commands.getoutput("tput bold"), + BOLDOFF=commands.getoutput("tput sgr0")): + ewrite("%s%s%s" % (BOLDON, s, BOLDOFF)) + + +PARA_ELEMENT = "para" + +DEBUG_PARA_FIXER = 0 + +if DEBUG_PARA_FIXER: + def para_msg(s): + ewrite("*** %s\n" % s) +else: + def para_msg(s): + pass + + +def get_first_element(doc, gi): + for n in doc.childNodes: + if n.nodeName == gi: + return n + +def extract_first_element(doc, gi): + node = get_first_element(doc, gi) + if node is not None: + doc.removeChild(node) + return node + + +def get_documentElement(node): + result = None + for child in node.childNodes: + if child.nodeType == ELEMENT: + result = child + return result + + +def set_tagName(elem, gi): + elem.nodeName = elem.tagName = gi + + +def find_all_elements(doc, gi): + nodes = [] + if doc.nodeName == gi: + nodes.append(doc) + for child in doc.childNodes: + if child.nodeType == ELEMENT: + if child.tagName == gi: + nodes.append(child) + for node in child.getElementsByTagName(gi): + nodes.append(node) + return nodes + +def find_all_child_elements(doc, gi): + nodes = [] + for child in doc.childNodes: + if child.nodeName == gi: + nodes.append(child) + return nodes + + +def find_all_elements_from_set(doc, gi_set): + return __find_all_elements_from_set(doc, gi_set, []) + +def __find_all_elements_from_set(doc, gi_set, nodes): + if doc.nodeName in gi_set: + nodes.append(doc) + for child in doc.childNodes: + if child.nodeType == ELEMENT: + __find_all_elements_from_set(child, gi_set, nodes) + return nodes + + +def simplify(doc, fragment): + # Try to rationalize the document a bit, since these things are simply + # not valid SGML/XML documents as they stand, and need a little work. + documentclass = "document" + inputs = [] + node = extract_first_element(fragment, "documentclass") + if node is not None: + documentclass = node.getAttribute("classname") + node = extract_first_element(fragment, "title") + if node is not None: + inputs.append(node) + # update the name of the root element + node = get_first_element(fragment, "document") + if node is not None: + set_tagName(node, documentclass) + # Move everything that comes before this node into this node; + # this will be the document element. + nodelist = fragment.childNodes + point = node.firstChild + while not nodelist[0].isSameNode(node): + node.insertBefore(nodelist[0], point) + while 1: + node = extract_first_element(fragment, "input") + if node is None: + break + inputs.append(node) + if inputs: + docelem = get_documentElement(fragment) + inputs.reverse() + for node in inputs: + text = doc.createTextNode("\n") + docelem.insertBefore(text, docelem.firstChild) + docelem.insertBefore(node, text) + docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) + while fragment.firstChild and fragment.firstChild.nodeType == TEXT: + fragment.removeChild(fragment.firstChild) + + +def cleanup_root_text(doc): + discards = [] + skip = 0 + for n in doc.childNodes: + prevskip = skip + skip = 0 + if n.nodeType == TEXT and not prevskip: + discards.append(n) + elif n.nodeName == "COMMENT": + skip = 1 + for node in discards: + doc.removeChild(node) + + +DESCRIPTOR_ELEMENTS = ( + "cfuncdesc", "cvardesc", "ctypedesc", + "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni", + "excdesc", "funcdesc", "funcdescni", "opcodedesc", + "datadesc", "datadescni", + ) + +def fixup_descriptors(doc, fragment): + sections = find_all_elements(fragment, "section") + for section in sections: + find_and_fix_descriptors(doc, section) + + +def find_and_fix_descriptors(doc, container): + children = container.childNodes + for child in children: + if child.nodeType == ELEMENT: + tagName = child.tagName + if tagName in DESCRIPTOR_ELEMENTS: + rewrite_descriptor(doc, child) + elif tagName == "subsection": + find_and_fix_descriptors(doc, child) + + +def rewrite_descriptor(doc, descriptor): + # + # Do these things: + # 1. Add an "index='no'" attribute to the element if the tagName + # ends in 'ni', removing the 'ni' from the name. + # 2. Create a <signature> from the name attribute + # 2a.Create an <args> if it appears to be available. + # 3. Create additional <signature>s from <*line{,ni}> elements, + # if found. + # 4. If a <versionadded> is found, move it to an attribute on the + # descriptor. + # 5. Move remaining child nodes to a <description> element. + # 6. Put it back together. + # + # 1. + descname = descriptor.tagName + index = descriptor.getAttribute("name") != "no" + desctype = descname[:-4] # remove 'desc' + linename = desctype + "line" + if not index: + linename = linename + "ni" + # 2. + signature = doc.createElement("signature") + name = doc.createElement("name") + signature.appendChild(doc.createTextNode("\n ")) + signature.appendChild(name) + name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) + descriptor.removeAttribute("name") + # 2a. + if descriptor.hasAttribute("var"): + if descname != "opcodedesc": + raise RuntimeError, \ + "got 'var' attribute on descriptor other than opcodedesc" + variable = descriptor.getAttribute("var") + if variable: + args = doc.createElement("args") + args.appendChild(doc.createTextNode(variable)) + signature.appendChild(doc.createTextNode("\n ")) + signature.appendChild(args) + descriptor.removeAttribute("var") + newchildren = [signature] + children = descriptor.childNodes + pos = skip_leading_nodes(children) + if pos < len(children): + child = children[pos] + if child.nodeName == "args": + # move <args> to <signature>, or remove if empty: + child.parentNode.removeChild(child) + if len(child.childNodes): + signature.appendChild(doc.createTextNode("\n ")) + signature.appendChild(child) + signature.appendChild(doc.createTextNode("\n ")) + # 3, 4. + pos = skip_leading_nodes(children, pos) + while pos < len(children) \ + and children[pos].nodeName in (linename, "versionadded"): + if children[pos].tagName == linename: + # this is really a supplemental signature, create <signature> + oldchild = children[pos].cloneNode(1) + try: + sig = methodline_to_signature(doc, children[pos]) + except KeyError: + print oldchild.toxml() + raise + newchildren.append(sig) + else: + # <versionadded added=...> + descriptor.setAttribute( + "added", children[pos].getAttribute("version")) + pos = skip_leading_nodes(children, pos + 1) + # 5. + description = doc.createElement("description") + description.appendChild(doc.createTextNode("\n")) + newchildren.append(description) + move_children(descriptor, description, pos) + last = description.childNodes[-1] + if last.nodeType == TEXT: + last.data = last.data.rstrip() + "\n " + # 6. + # should have nothing but whitespace and signature lines in <descriptor>; + # discard them + while descriptor.childNodes: + descriptor.removeChild(descriptor.childNodes[0]) + for node in newchildren: + descriptor.appendChild(doc.createTextNode("\n ")) + descriptor.appendChild(node) + descriptor.appendChild(doc.createTextNode("\n")) + + +def methodline_to_signature(doc, methodline): + signature = doc.createElement("signature") + signature.appendChild(doc.createTextNode("\n ")) + name = doc.createElement("name") + name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) + methodline.removeAttribute("name") + signature.appendChild(name) + if len(methodline.childNodes): + args = doc.createElement("args") + signature.appendChild(doc.createTextNode("\n ")) + signature.appendChild(args) + move_children(methodline, args) + signature.appendChild(doc.createTextNode("\n ")) + return signature + + +def move_children(origin, dest, start=0): + children = origin.childNodes + while start < len(children): + node = children[start] + origin.removeChild(node) + dest.appendChild(node) + + +def handle_appendix(doc, fragment): + # must be called after simplfy() if document is multi-rooted to begin with + docelem = get_documentElement(fragment) + toplevel = docelem.tagName == "manual" and "chapter" or "section" + appendices = 0 + nodes = [] + for node in docelem.childNodes: + if appendices: + nodes.append(node) + elif node.nodeType == ELEMENT: + appnodes = node.getElementsByTagName("appendix") + if appnodes: + appendices = 1 + parent = appnodes[0].parentNode + parent.removeChild(appnodes[0]) + parent.normalize() + if nodes: + map(docelem.removeChild, nodes) + docelem.appendChild(doc.createTextNode("\n\n\n")) + back = doc.createElement("back-matter") + docelem.appendChild(back) + back.appendChild(doc.createTextNode("\n")) + while nodes and nodes[0].nodeType == TEXT \ + and not nodes[0].data.strip(): + del nodes[0] + map(back.appendChild, nodes) + docelem.appendChild(doc.createTextNode("\n")) + + +def handle_labels(doc, fragment): + for label in find_all_elements(fragment, "label"): + id = label.getAttribute("id") + if not id: + continue + parent = label.parentNode + parentTagName = parent.tagName + if parentTagName == "title": + parent.parentNode.setAttribute("id", id) + else: + parent.setAttribute("id", id) + # now, remove <label id="..."/> from parent: + parent.removeChild(label) + if parentTagName == "title": + parent.normalize() + children = parent.childNodes + if children[-1].nodeType == TEXT: + children[-1].data = children[-1].data.rstrip() + + +def fixup_trailing_whitespace(doc, fragment, wsmap): + queue = [fragment] + fixups = [] + while queue: + node = queue[0] + del queue[0] + if wsmap.has_key(node.nodeName): + fixups.append(node) + for child in node.childNodes: + if child.nodeType == ELEMENT: + queue.append(child) + + # reverse the list to process from the inside out + fixups.reverse() + for node in fixups: + node.parentNode.normalize() + lastchild = node.lastChild + before, after = wsmap[node.tagName] + if lastchild.nodeType == TEXT: + data = lastchild.data.rstrip() + before + lastchild.data = data + norm = 0 + if wsmap[node.tagName]: + nextnode = node.nextSibling + if nextnode and nextnode.nodeType == TEXT: + nextnode.data = after + nextnode.data.lstrip() + else: + wsnode = doc.createTextNode(after) + node.parentNode.insertBefore(wsnode, nextnode) + # hack to get the title in place: + if node.tagName == "title" \ + and node.parentNode.firstChild.nodeType == ELEMENT: + node.parentNode.insertBefore(doc.createTextNode("\n "), + node.parentNode.firstChild) + node.parentNode.normalize() + + +def normalize(doc): + for node in doc.childNodes: + if node.nodeType == ELEMENT: + node.normalize() + + +def cleanup_trailing_parens(doc, element_names): + d = {} + for gi in element_names: + d[gi] = gi + rewrite_element = d.has_key + queue = [node for node in doc.childNodes if node.nodeType == ELEMENT] + while queue: + node = queue[0] + del queue[0] + if rewrite_element(node.tagName): + lastchild = node.lastChild + if lastchild and lastchild.nodeType == TEXT: + data = lastchild.data + if data.endswith("()"): + lastchild.data = data[:-2] + else: + for child in node.childNodes: + if child.nodeType == ELEMENT: + queue.append(child) + + +def contents_match(left, right): + left_children = left.childNodes + right_children = right.childNodes + if len(left_children) != len(right_children): + return 0 + for l, r in map(None, left_children, right_children): + nodeType = l.nodeType + if nodeType != r.nodeType: + return 0 + if nodeType == ELEMENT: + if l.tagName != r.tagName: + return 0 + # should check attributes, but that's not a problem here + if not contents_match(l, r): + return 0 + elif nodeType == TEXT: + if l.data != r.data: + return 0 + else: + # not quite right, but good enough + return 0 + return 1 + + +def create_module_info(doc, section): + # Heavy. + node = extract_first_element(section, "modulesynopsis") + if node is None: + return + set_tagName(node, "synopsis") + lastchild = node.childNodes[-1] + if lastchild.nodeType == TEXT \ + and lastchild.data[-1:] == ".": + lastchild.data = lastchild.data[:-1] + modauthor = extract_first_element(section, "moduleauthor") + if modauthor: + set_tagName(modauthor, "author") + modauthor.appendChild(doc.createTextNode( + modauthor.getAttribute("name"))) + modauthor.removeAttribute("name") + platform = extract_first_element(section, "platform") + if section.tagName == "section": + modinfo_pos = 2 + modinfo = doc.createElement("moduleinfo") + moddecl = extract_first_element(section, "declaremodule") + name = None + if moddecl: + modinfo.appendChild(doc.createTextNode("\n ")) + name = moddecl.attributes["name"].value + namenode = doc.createElement("name") + namenode.appendChild(doc.createTextNode(name)) + modinfo.appendChild(namenode) + type = moddecl.attributes.get("type") + if type: + type = type.value + modinfo.appendChild(doc.createTextNode("\n ")) + typenode = doc.createElement("type") + typenode.appendChild(doc.createTextNode(type)) + modinfo.appendChild(typenode) + versionadded = extract_first_element(section, "versionadded") + if versionadded: + modinfo.setAttribute("added", versionadded.getAttribute("version")) + title = get_first_element(section, "title") + if title: + children = title.childNodes + if len(children) >= 2 \ + and children[0].nodeName == "module" \ + and children[0].childNodes[0].data == name: + # this is it; morph the <title> into <short-synopsis> + first_data = children[1] + if first_data.data[:4] == " ---": + first_data.data = first_data.data[4:].lstrip() + set_tagName(title, "short-synopsis") + if children[-1].nodeType == TEXT \ + and children[-1].data[-1:] == ".": + children[-1].data = children[-1].data[:-1] + section.removeChild(title) + section.removeChild(section.childNodes[0]) + title.removeChild(children[0]) + modinfo_pos = 0 + else: + ewrite("module name in title doesn't match" + " <declaremodule/>; no <short-synopsis/>\n") + else: + ewrite("Unexpected condition: <section/> without <title/>\n") + modinfo.appendChild(doc.createTextNode("\n ")) + modinfo.appendChild(node) + if title and not contents_match(title, node): + # The short synopsis is actually different, + # and needs to be stored: + modinfo.appendChild(doc.createTextNode("\n ")) + modinfo.appendChild(title) + if modauthor: + modinfo.appendChild(doc.createTextNode("\n ")) + modinfo.appendChild(modauthor) + if platform: + modinfo.appendChild(doc.createTextNode("\n ")) + modinfo.appendChild(platform) + modinfo.appendChild(doc.createTextNode("\n ")) + section.insertBefore(modinfo, section.childNodes[modinfo_pos]) + section.insertBefore(doc.createTextNode("\n "), modinfo) + # + # The rest of this removes extra newlines from where we cut out + # a lot of elements. A lot of code for minimal value, but keeps + # keeps the generated *ML from being too funny looking. + # + section.normalize() + children = section.childNodes + for i in range(len(children)): + node = children[i] + if node.nodeName == "moduleinfo": + nextnode = children[i+1] + if nextnode.nodeType == TEXT: + data = nextnode.data + s = data.lstrip() + if len(s) < (len(data) - 4): + nextnode.data = "\n\n\n" + s + + +def cleanup_synopses(doc, fragment): + for node in find_all_elements(fragment, "section"): + create_module_info(doc, node) + + +def fixup_table_structures(doc, fragment): + for table in find_all_elements(fragment, "table"): + fixup_table(doc, table) + + +def fixup_table(doc, table): + # create the table head + thead = doc.createElement("thead") + row = doc.createElement("row") + move_elements_by_name(doc, table, row, "entry") + thead.appendChild(doc.createTextNode("\n ")) + thead.appendChild(row) + thead.appendChild(doc.createTextNode("\n ")) + # create the table body + tbody = doc.createElement("tbody") + prev_row = None + last_was_hline = 0 + children = table.childNodes + for child in children: + if child.nodeType == ELEMENT: + tagName = child.tagName + if tagName == "hline" and prev_row is not None: + prev_row.setAttribute("rowsep", "1") + elif tagName == "row": + prev_row = child + # save the rows: + tbody.appendChild(doc.createTextNode("\n ")) + move_elements_by_name(doc, table, tbody, "row", sep="\n ") + # and toss the rest: + while children: + child = children[0] + nodeType = child.nodeType + if nodeType == TEXT: + if child.data.strip(): + raise ConversionError("unexpected free data in <%s>: %r" + % (table.tagName, child.data)) + table.removeChild(child) + continue + if nodeType == ELEMENT: + if child.tagName != "hline": + raise ConversionError( + "unexpected <%s> in table" % child.tagName) + table.removeChild(child) + continue + raise ConversionError( + "unexpected %s node in table" % child.__class__.__name__) + # nothing left in the <table>; add the <thead> and <tbody> + tgroup = doc.createElement("tgroup") + tgroup.appendChild(doc.createTextNode("\n ")) + tgroup.appendChild(thead) + tgroup.appendChild(doc.createTextNode("\n ")) + tgroup.appendChild(tbody) + tgroup.appendChild(doc.createTextNode("\n ")) + table.appendChild(tgroup) + # now make the <entry>s look nice: + for row in table.getElementsByTagName("row"): + fixup_row(doc, row) + + +def fixup_row(doc, row): + entries = [] + map(entries.append, row.childNodes[1:]) + for entry in entries: + row.insertBefore(doc.createTextNode("\n "), entry) +# row.appendChild(doc.createTextNode("\n ")) + + +def move_elements_by_name(doc, source, dest, name, sep=None): + nodes = [] + for child in source.childNodes: + if child.nodeName == name: + nodes.append(child) + for node in nodes: + source.removeChild(node) + dest.appendChild(node) + if sep: + dest.appendChild(doc.createTextNode(sep)) + + +RECURSE_INTO_PARA_CONTAINERS = ( + "chapter", "abstract", "enumerate", + "section", "subsection", "subsubsection", + "paragraph", "subparagraph", "back-matter", + "howto", "manual", + "item", "itemize", "fulllineitems", "enumeration", "descriptionlist", + "definitionlist", "definition", + ) + +PARA_LEVEL_ELEMENTS = ( + "moduleinfo", "title", "verbatim", "enumerate", "item", + "interpreter-session", "back-matter", "interactive-session", + "opcodedesc", "classdesc", "datadesc", + "cfuncdesc", "ctypedesc", "cvardesc", + "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni", + "funcdescni", "methoddescni", "excdescni", + "tableii", "tableiii", "tableiv", "localmoduletable", + "sectionauthor", "seealso", "itemize", + # include <para>, so we can just do it again to get subsequent paras: + PARA_ELEMENT, + ) + +PARA_LEVEL_PRECEEDERS = ( + "setindexsubitem", "author", + "stindex", "obindex", "COMMENT", "label", "xi:include", "title", + "versionadded", "versionchanged", "declaremodule", "modulesynopsis", + "moduleauthor", "indexterm", "leader", + ) + + +def fixup_paras(doc, fragment): + for child in fragment.childNodes: + if child.nodeName in RECURSE_INTO_PARA_CONTAINERS: + fixup_paras_helper(doc, child) + descriptions = find_all_elements(fragment, "description") + for description in descriptions: + fixup_paras_helper(doc, description) + + +def fixup_paras_helper(doc, container, depth=0): + # document is already normalized + children = container.childNodes + start = skip_leading_nodes(children) + while len(children) > start: + if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS: + # Something to recurse into: + fixup_paras_helper(doc, children[start]) + else: + # Paragraph material: + build_para(doc, container, start, len(children)) + if DEBUG_PARA_FIXER and depth == 10: + sys.exit(1) + start = skip_leading_nodes(children, start + 1) + + +def build_para(doc, parent, start, i): + children = parent.childNodes + after = start + 1 + have_last = 0 + BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS + # Collect all children until \n\n+ is found in a text node or a + # member of BREAK_ELEMENTS is found. + for j in range(start, i): + after = j + 1 + child = children[j] + nodeType = child.nodeType + if nodeType == ELEMENT: + if child.tagName in BREAK_ELEMENTS: + after = j + break + elif nodeType == TEXT: + pos = child.data.find("\n\n") + if pos == 0: + after = j + break + if pos >= 1: + child.splitText(pos) + break + else: + have_last = 1 + if (start + 1) > after: + raise ConversionError( + "build_para() could not identify content to turn into a paragraph") + if children[after - 1].nodeType == TEXT: + # we may need to split off trailing white space: + child = children[after - 1] + data = child.data + if data.rstrip() != data: + have_last = 0 + child.splitText(len(data.rstrip())) + para = doc.createElement(PARA_ELEMENT) + prev = None + indexes = range(start, after) + indexes.reverse() + for j in indexes: + node = parent.childNodes[j] + parent.removeChild(node) + para.insertBefore(node, prev) + prev = node + if have_last: + parent.appendChild(para) + parent.appendChild(doc.createTextNode("\n\n")) + return len(parent.childNodes) + else: + nextnode = parent.childNodes[start] + if nextnode.nodeType == TEXT: + if nextnode.data and nextnode.data[0] != "\n": + nextnode.data = "\n" + nextnode.data + else: + newnode = doc.createTextNode("\n") + parent.insertBefore(newnode, nextnode) + nextnode = newnode + start = start + 1 + parent.insertBefore(para, nextnode) + return start + 1 + + +def skip_leading_nodes(children, start=0): + """Return index into children of a node at which paragraph building should + begin or a recursive call to fixup_paras_helper() should be made (for + subsections, etc.). + + When the return value >= len(children), we've built all the paras we can + from this list of children. + """ + i = len(children) + while i > start: + # skip over leading comments and whitespace: + child = children[start] + nodeType = child.nodeType + if nodeType == TEXT: + data = child.data + shortened = data.lstrip() + if shortened: + if data != shortened: + # break into two nodes: whitespace and non-whitespace + child.splitText(len(data) - len(shortened)) + return start + 1 + return start + # all whitespace, just skip + elif nodeType == ELEMENT: + tagName = child.tagName + if tagName in RECURSE_INTO_PARA_CONTAINERS: + return start + if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS: + return start + start = start + 1 + return start + + +def fixup_rfc_references(doc, fragment): + for rfcnode in find_all_elements_from_set(fragment, ("pep", "rfc")): + rfcnode.appendChild(doc.createTextNode( + rfcnode.tagName.upper() + " " + rfcnode.getAttribute("num"))) + + +def fixup_signatures(doc, fragment): + for child in fragment.childNodes: + if child.nodeType == ELEMENT: + args = child.getElementsByTagName("args") + for arg in args: + rewrite_args(doc, arg) + args = child.getElementsByTagName("constructor-args") + for arg in args: + rewrite_args(doc, arg) + +def rewrite_args(doc, arglist): + fixup_args(doc, arglist) + arglist.normalize() + if arglist.childNodes.length == 1 and arglist.firstChild.nodeType == TEXT: + node = arglist.firstChild + node.data = ' '.join(node.data.split()) + +def fixup_args(doc, arglist): + for child in arglist.childNodes: + if child.nodeName == "optional": + # found it; fix and return + arglist.insertBefore(doc.createTextNode("["), child) + optkids = child.childNodes + while optkids: + arglist.insertBefore(child.firstChild, child) + arglist.insertBefore(doc.createTextNode("]"), child) + arglist.removeChild(child) + return fixup_args(doc, arglist) + + +def fixup_sectionauthors(doc, fragment): + for sectauth in find_all_elements(fragment, "sectionauthor"): + section = sectauth.parentNode + section.removeChild(sectauth) + set_tagName(sectauth, "author") + sectauth.appendChild(doc.createTextNode( + sectauth.getAttribute("name"))) + sectauth.removeAttribute("name") + after = section.childNodes[2] + title = section.childNodes[1] + if title.nodeName != "title": + after = section.childNodes[0] + section.insertBefore(doc.createTextNode("\n "), after) + section.insertBefore(sectauth, after) + + +def fixup_verbatims(doc): + for verbatim in find_all_elements(doc, "verbatim"): + child = verbatim.childNodes[0] + if child.nodeType == TEXT \ + and child.data.lstrip().startswith(">>>"): + set_tagName(verbatim, "interactive-session") + + +def add_node_ids(fragment, counter=0): + fragment.node_id = counter + for node in fragment.childNodes: + counter = counter + 1 + if node.nodeType == ELEMENT: + counter = add_node_ids(node, counter) + else: + node.node_id = counter + return counter + 1 + + +def fixup_ulink(doc, fragment): + for ulink in find_all_elements(fragment, "ulink"): + children = ulink.childNodes + assert len(children) == 2 + text = children[0] + href = children[1] + href.normalize() + assert len(href.childNodes) == 1 + assert href.childNodes[0].nodeType == TEXT + url = href.childNodes[0].data + ulink.setAttribute("href", url) + ulink.removeChild(href) + content = text.childNodes + while len(content): + ulink.appendChild(content[0]) + ulink.removeChild(text) + + +REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex', + 'refexmodindex', 'refstmodindex') + +def fixup_refmodindexes(fragment): + # Locate <ref*modindex>...</> co-located with <module>...</>, and + # remove the <ref*modindex>, replacing it with index=index on the + # <module> element. + nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS) + d = {} + for node in nodes: + parent = node.parentNode + d[parent.node_id] = parent + del nodes + map(fixup_refmodindexes_chunk, d.values()) + + +def fixup_refmodindexes_chunk(container): + # node is probably a <para>; let's see how often it isn't: + if container.tagName != PARA_ELEMENT: + bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container) + module_entries = find_all_elements(container, "module") + if not module_entries: + return + index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS) + removes = [] + for entry in index_entries: + children = entry.childNodes + if len(children) != 0: + bwrite("--- unexpected number of children for %s node:\n" + % entry.tagName) + ewrite(entry.toxml() + "\n") + continue + found = 0 + module_name = entry.getAttribute("module") + for node in module_entries: + if len(node.childNodes) != 1: + continue + this_name = node.childNodes[0].data + if this_name == module_name: + found = 1 + node.setAttribute("index", "yes") + if found: + removes.append(entry) + for node in removes: + container.removeChild(node) + + +def fixup_bifuncindexes(fragment): + nodes = find_all_elements(fragment, 'bifuncindex') + d = {} + # make sure that each parent is only processed once: + for node in nodes: + parent = node.parentNode + d[parent.node_id] = parent + del nodes + map(fixup_bifuncindexes_chunk, d.values()) + + +def fixup_bifuncindexes_chunk(container): + removes = [] + entries = find_all_child_elements(container, "bifuncindex") + function_entries = find_all_child_elements(container, "function") + for entry in entries: + function_name = entry.getAttribute("name") + found = 0 + for func_entry in function_entries: + t2 = func_entry.childNodes[0].data + if t2[-2:] != "()": + continue + t2 = t2[:-2] + if t2 == function_name: + func_entry.setAttribute("index", "yes") + func_entry.setAttribute("module", "__builtin__") + if not found: + found = 1 + removes.append(entry) + for entry in removes: + container.removeChild(entry) + + +def join_adjacent_elements(container, gi): + queue = [container] + while queue: + parent = queue.pop() + i = 0 + children = parent.childNodes + nchildren = len(children) + while i < (nchildren - 1): + child = children[i] + if child.nodeName == gi: + if children[i+1].nodeName == gi: + ewrite("--- merging two <%s/> elements\n" % gi) + child = children[i] + nextchild = children[i+1] + nextchildren = nextchild.childNodes + while len(nextchildren): + node = nextchildren[0] + nextchild.removeChild(node) + child.appendChild(node) + parent.removeChild(nextchild) + continue + if child.nodeType == ELEMENT: + queue.append(child) + i = i + 1 + + +_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") + +def write_esis(doc, ofp, knownempty): + for node in doc.childNodes: + nodeType = node.nodeType + if nodeType == ELEMENT: + gi = node.tagName + if knownempty(gi): + if node.hasChildNodes(): + raise ValueError, \ + "declared-empty node <%s> has children" % gi + ofp.write("e\n") + for k, value in node.attributes.items(): + if _token_rx.match(value): + dtype = "TOKEN" + else: + dtype = "CDATA" + ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value))) + ofp.write("(%s\n" % gi) + write_esis(node, ofp, knownempty) + ofp.write(")%s\n" % gi) + elif nodeType == TEXT: + ofp.write("-%s\n" % esistools.encode(node.data)) + elif nodeType == ENTITY_REFERENCE: + ofp.write("&%s\n" % node.nodeName) + else: + raise RuntimeError, "unsupported node type: %s" % nodeType + + +def convert(ifp, ofp): + events = esistools.parse(ifp) + toktype, doc = events.getEvent() + fragment = doc.createDocumentFragment() + events.expandNode(fragment) + + normalize(fragment) + simplify(doc, fragment) + handle_labels(doc, fragment) + handle_appendix(doc, fragment) + fixup_trailing_whitespace(doc, fragment, { + # element -> (before-end-tag, after-end-tag) + "abstract": ("\n", "\n"), + "title": ("", "\n"), + "chapter": ("\n", "\n\n\n"), + "section": ("\n", "\n\n\n"), + "subsection": ("\n", "\n\n"), + "subsubsection": ("\n", "\n\n"), + "paragraph": ("\n", "\n\n"), + "subparagraph": ("\n", "\n\n"), + "description": ("\n", "\n\n"), + "enumeration": ("\n", "\n\n"), + "item": ("\n", "\n\n"), + }) + cleanup_root_text(doc) + cleanup_trailing_parens(fragment, ["function", "method", "cfunction"]) + cleanup_synopses(doc, fragment) + fixup_descriptors(doc, fragment) + fixup_verbatims(fragment) + normalize(fragment) + fixup_paras(doc, fragment) + fixup_sectionauthors(doc, fragment) + fixup_table_structures(doc, fragment) + fixup_rfc_references(doc, fragment) + fixup_signatures(doc, fragment) + fixup_ulink(doc, fragment) + add_node_ids(fragment) + fixup_refmodindexes(fragment) + fixup_bifuncindexes(fragment) + # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and + # LaTeX2HTML screwing with GNU-style long options (the '--' problem). + join_adjacent_elements(fragment, "option") + # Attempt to avoid trailing blank lines: + fragment.normalize() + if fragment.lastChild.data[-1:] == "\n": + fragment.lastChild.data = fragment.lastChild.data.rstrip() + "\n" + # + d = {} + for gi in events.parser.get_empties(): + d[gi] = gi + for key in ("author", "pep", "rfc"): + if d.has_key(key): + del d[key] + knownempty = d.has_key + # + try: + write_esis(fragment, ofp, knownempty) + except IOError, (err, msg): + # Ignore EPIPE; it just means that whoever we're writing to stopped + # reading. The rest of the output would be ignored. All other errors + # should still be reported, + if err != errno.EPIPE: + raise + + +def main(): + if len(sys.argv) == 1: + ifp = sys.stdin + ofp = sys.stdout + elif len(sys.argv) == 2: + ifp = open(sys.argv[1]) + ofp = sys.stdout + elif len(sys.argv) == 3: + ifp = open(sys.argv[1]) + import StringIO + ofp = StringIO.StringIO() + else: + usage() + sys.exit(2) + convert(ifp, ofp) + if len(sys.argv) == 3: + fp = open(sys.argv[2], "w") + fp.write(ofp.getvalue()) + fp.close() + ofp.close() + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/esis2sgml.py b/sys/src/cmd/python/Doc/tools/sgmlconv/esis2sgml.py new file mode 100755 index 000000000..b6f9a4475 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/esis2sgml.py @@ -0,0 +1,264 @@ +#! /usr/bin/env python + +"""Convert ESIS events to SGML or XML markup. + +This is limited, but seems sufficient for the ESIS generated by the +latex2esis.py script when run over the Python documentation. +""" + +# This should have an explicit option to indicate whether the *INPUT* was +# generated from an SGML or an XML application. + +import errno +import os +import re +import string + +from xml.sax.saxutils import escape + +import esistools + + +AUTOCLOSE = () + +EMPTIES_FILENAME = "../sgml/empties.dat" +LIST_EMPTIES = 0 + + +_elem_map = {} +_attr_map = {} +_token_map = {} + +_normalize_case = str + +def map_gi(sgmlgi, map): + uncased = _normalize_case(sgmlgi) + try: + return map[uncased] + except IndexError: + map[uncased] = sgmlgi + return sgmlgi + +def null_map_gi(sgmlgi, map): + return sgmlgi + + +def format_attrs(attrs, xml=0): + attrs = attrs.items() + attrs.sort() + parts = [] + append = parts.append + for name, value in attrs: + if xml: + append('%s="%s"' % (name, escape(value))) + else: + # this is a little bogus, but should do for now + if name == value and isnmtoken(value): + append(value) + elif istoken(value): + if value == "no" + name: + append(value) + else: + append("%s=%s" % (name, value)) + else: + append('%s="%s"' % (name, escape(value))) + if parts: + parts.insert(0, '') + return " ".join(parts) + + +_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE) +def isnmtoken(s): + return _nmtoken_rx.match(s) is not None + +_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE) +def istoken(s): + return _token_rx.match(s) is not None + + +def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()): + if xml: + autoclose = () + attrs = {} + lastopened = None + knownempties = [] + knownempty = 0 + lastempty = 0 + inverbatim = 0 + while 1: + line = ifp.readline() + if not line: + break + + type = line[0] + data = line[1:] + if data and data[-1] == "\n": + data = data[:-1] + if type == "-": + data = esistools.decode(data) + data = escape(data) + if not inverbatim: + data = data.replace("---", "—") + ofp.write(data) + if "\n" in data: + lastopened = None + knownempty = 0 + lastempty = 0 + elif type == "(": + if data == "COMMENT": + ofp.write("<!--") + continue + data = map_gi(data, _elem_map) + if knownempty and xml: + ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml))) + else: + ofp.write("<%s%s>" % (data, format_attrs(attrs, xml))) + if knownempty and data not in knownempties: + # accumulate knowledge! + knownempties.append(data) + attrs = {} + lastopened = data + lastempty = knownempty + knownempty = 0 + inverbatim = data in verbatims + elif type == ")": + if data == "COMMENT": + ofp.write("-->") + continue + data = map_gi(data, _elem_map) + if xml: + if not lastempty: + ofp.write("</%s>" % data) + elif data not in knownempties: + if data in autoclose: + pass + elif lastopened == data: + ofp.write("</>") + else: + ofp.write("</%s>" % data) + lastopened = None + lastempty = 0 + inverbatim = 0 + elif type == "A": + name, type, value = data.split(" ", 2) + name = map_gi(name, _attr_map) + attrs[name] = esistools.decode(value) + elif type == "e": + knownempty = 1 + elif type == "&": + ofp.write("&%s;" % data) + knownempty = 0 + else: + raise RuntimeError, "unrecognized ESIS event type: '%s'" % type + + if LIST_EMPTIES: + dump_empty_element_names(knownempties) + + +def dump_empty_element_names(knownempties): + d = {} + for gi in knownempties: + d[gi] = gi + knownempties.append("") + if os.path.isfile(EMPTIES_FILENAME): + fp = open(EMPTIES_FILENAME) + while 1: + line = fp.readline() + if not line: + break + gi = line.strip() + if gi: + d[gi] = gi + fp = open(EMPTIES_FILENAME, "w") + gilist = d.keys() + gilist.sort() + fp.write("\n".join(gilist)) + fp.write("\n") + fp.close() + + +def update_gi_map(map, names, fromsgml=1): + for name in names.split(","): + if fromsgml: + uncased = name.lower() + else: + uncased = name + map[uncased] = name + + +def main(): + import getopt + import sys + # + autoclose = AUTOCLOSE + xml = 1 + xmldecl = 0 + elem_names = '' + attr_names = '' + value_names = '' + verbatims = ('verbatim', 'interactive-session') + opts, args = getopt.getopt(sys.argv[1:], "adesx", + ["autoclose=", "declare", "sgml", "xml", + "elements-map=", "attributes-map", + "values-map="]) + for opt, arg in opts: + if opt in ("-d", "--declare"): + xmldecl = 1 + elif opt == "-e": + global LIST_EMPTIES + LIST_EMPTIES = 1 + elif opt in ("-s", "--sgml"): + xml = 0 + elif opt in ("-x", "--xml"): + xml = 1 + elif opt in ("-a", "--autoclose"): + autoclose = arg.split(",") + elif opt == "--elements-map": + elem_names = ("%s,%s" % (elem_names, arg))[1:] + elif opt == "--attributes-map": + attr_names = ("%s,%s" % (attr_names, arg))[1:] + elif opt == "--values-map": + value_names = ("%s,%s" % (value_names, arg))[1:] + # + # open input streams: + # + if len(args) == 0: + ifp = sys.stdin + ofp = sys.stdout + elif len(args) == 1: + ifp = open(args[0]) + ofp = sys.stdout + elif len(args) == 2: + ifp = open(args[0]) + ofp = open(args[1], "w") + else: + usage() + sys.exit(2) + # + # setup the name maps: + # + if elem_names or attr_names or value_names: + # assume the origin was SGML; ignore case of the names from the ESIS + # stream but set up conversion tables to get the case right on output + global _normalize_case + _normalize_case = string.lower + update_gi_map(_elem_map, elem_names.split(",")) + update_gi_map(_attr_map, attr_names.split(",")) + update_gi_map(_values_map, value_names.split(",")) + else: + global map_gi + map_gi = null_map_gi + # + # run the conversion: + # + try: + if xml and xmldecl: + opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n') + convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims) + except IOError, (err, msg): + if err != errno.EPIPE: + raise + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/esistools.py b/sys/src/cmd/python/Doc/tools/sgmlconv/esistools.py new file mode 100644 index 000000000..833fea171 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/esistools.py @@ -0,0 +1,312 @@ +"""Miscellaneous utility functions useful for dealing with ESIS streams.""" + +import re + +import xml.dom.pulldom + +import xml.sax +import xml.sax.handler +import xml.sax.xmlreader + + +_data_match = re.compile(r"[^\\][^\\]*").match + +def decode(s): + r = '' + while s: + m = _data_match(s) + if m: + r = r + m.group() + s = s[m.end():] + elif s[1] == "\\": + r = r + "\\" + s = s[2:] + elif s[1] == "n": + r = r + "\n" + s = s[2:] + elif s[1] == "%": + s = s[2:] + n, s = s.split(";", 1) + r = r + unichr(int(n)) + else: + raise ValueError, "can't handle %r" % s + return r + + +_charmap = {} +for c in range(128): + _charmap[chr(c)] = chr(c) + _charmap[unichr(c + 128)] = chr(c + 128) +_charmap["\n"] = r"\n" +_charmap["\\"] = r"\\" +del c + +_null_join = ''.join +def encode(s): + try: + return _null_join(map(_charmap.get, s)) + except TypeError: + raise Exception("could not encode %r: %r" % (s, map(_charmap.get, s))) + + +class ESISReader(xml.sax.xmlreader.XMLReader): + """SAX Reader which reads from an ESIS stream. + + No verification of the document structure is performed by the + reader; a general verifier could be used as the target + ContentHandler instance. + + """ + _decl_handler = None + _lexical_handler = None + + _public_id = None + _system_id = None + + _buffer = "" + _is_empty = 0 + _lineno = 0 + _started = 0 + + def __init__(self, contentHandler=None, errorHandler=None): + xml.sax.xmlreader.XMLReader.__init__(self) + self._attrs = {} + self._attributes = Attributes(self._attrs) + self._locator = Locator() + self._empties = {} + if contentHandler: + self.setContentHandler(contentHandler) + if errorHandler: + self.setErrorHandler(errorHandler) + + def get_empties(self): + return self._empties.keys() + + # + # XMLReader interface + # + + def parse(self, source): + raise RuntimeError + self._locator._public_id = source.getPublicId() + self._locator._system_id = source.getSystemId() + fp = source.getByteStream() + handler = self.getContentHandler() + if handler: + handler.startDocument() + lineno = 0 + while 1: + token, data = self._get_token(fp) + if token is None: + break + lineno = lineno + 1 + self._locator._lineno = lineno + self._handle_token(token, data) + handler = self.getContentHandler() + if handler: + handler.startDocument() + + def feed(self, data): + if not self._started: + handler = self.getContentHandler() + if handler: + handler.startDocument() + self._started = 1 + data = self._buffer + data + self._buffer = None + lines = data.split("\n") + if lines: + for line in lines[:-1]: + self._lineno = self._lineno + 1 + self._locator._lineno = self._lineno + if not line: + e = xml.sax.SAXParseException( + "ESIS input line contains no token type mark", + None, self._locator) + self.getErrorHandler().error(e) + else: + self._handle_token(line[0], line[1:]) + self._buffer = lines[-1] + else: + self._buffer = "" + + def close(self): + handler = self.getContentHandler() + if handler: + handler.endDocument() + self._buffer = "" + + def _get_token(self, fp): + try: + line = fp.readline() + except IOError, e: + e = SAXException("I/O error reading input stream", e) + self.getErrorHandler().fatalError(e) + return + if not line: + return None, None + if line[-1] == "\n": + line = line[:-1] + if not line: + e = xml.sax.SAXParseException( + "ESIS input line contains no token type mark", + None, self._locator) + self.getErrorHandler().error(e) + return + return line[0], line[1:] + + def _handle_token(self, token, data): + handler = self.getContentHandler() + if token == '-': + if data and handler: + handler.characters(decode(data)) + elif token == ')': + if handler: + handler.endElement(decode(data)) + elif token == '(': + if self._is_empty: + self._empties[data] = 1 + self._is_empty = 0 + if handler: + handler.startElement(data, self._attributes) + self._attrs.clear() + elif token == 'A': + name, value = data.split(' ', 1) + if value != "IMPLIED": + type, value = value.split(' ', 1) + self._attrs[name] = (decode(value), type) + elif token == '&': + # entity reference in SAX? + pass + elif token == '?': + if handler: + if ' ' in data: + target, data = data.split(None, 1) + else: + target, data = data, "" + handler.processingInstruction(target, decode(data)) + elif token == 'N': + handler = self.getDTDHandler() + if handler: + handler.notationDecl(data, self._public_id, self._system_id) + self._public_id = None + self._system_id = None + elif token == 'p': + self._public_id = decode(data) + elif token == 's': + self._system_id = decode(data) + elif token == 'e': + self._is_empty = 1 + elif token == 'C': + pass + else: + e = SAXParseException("unknown ESIS token in event stream", + None, self._locator) + self.getErrorHandler().error(e) + + def setContentHandler(self, handler): + old = self.getContentHandler() + if old: + old.setDocumentLocator(None) + if handler: + handler.setDocumentLocator(self._locator) + xml.sax.xmlreader.XMLReader.setContentHandler(self, handler) + + def getProperty(self, property): + if property == xml.sax.handler.property_lexical_handler: + return self._lexical_handler + + elif property == xml.sax.handler.property_declaration_handler: + return self._decl_handler + + else: + raise xml.sax.SAXNotRecognizedException("unknown property %r" + % (property, )) + + def setProperty(self, property, value): + if property == xml.sax.handler.property_lexical_handler: + if self._lexical_handler: + self._lexical_handler.setDocumentLocator(None) + if value: + value.setDocumentLocator(self._locator) + self._lexical_handler = value + + elif property == xml.sax.handler.property_declaration_handler: + if self._decl_handler: + self._decl_handler.setDocumentLocator(None) + if value: + value.setDocumentLocator(self._locator) + self._decl_handler = value + + else: + raise xml.sax.SAXNotRecognizedException() + + def getFeature(self, feature): + if feature == xml.sax.handler.feature_namespaces: + return 1 + else: + return xml.sax.xmlreader.XMLReader.getFeature(self, feature) + + def setFeature(self, feature, enabled): + if feature == xml.sax.handler.feature_namespaces: + pass + else: + xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled) + + +class Attributes(xml.sax.xmlreader.AttributesImpl): + # self._attrs has the form {name: (value, type)} + + def getType(self, name): + return self._attrs[name][1] + + def getValue(self, name): + return self._attrs[name][0] + + def getValueByQName(self, name): + return self._attrs[name][0] + + def __getitem__(self, name): + return self._attrs[name][0] + + def get(self, name, default=None): + if self._attrs.has_key(name): + return self._attrs[name][0] + return default + + def items(self): + L = [] + for name, (value, type) in self._attrs.items(): + L.append((name, value)) + return L + + def values(self): + L = [] + for value, type in self._attrs.values(): + L.append(value) + return L + + +class Locator(xml.sax.xmlreader.Locator): + _lineno = -1 + _public_id = None + _system_id = None + + def getLineNumber(self): + return self._lineno + + def getPublicId(self): + return self._public_id + + def getSystemId(self): + return self._system_id + + +def parse(stream_or_string, parser=None): + if type(stream_or_string) in [type(""), type(u"")]: + stream = open(stream_or_string) + else: + stream = stream_or_string + if not parser: + parser = ESISReader() + return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20) diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/latex2esis.py b/sys/src/cmd/python/Doc/tools/sgmlconv/latex2esis.py new file mode 100755 index 000000000..643ef2ca3 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/latex2esis.py @@ -0,0 +1,565 @@ +#! /usr/bin/env python + +"""Generate ESIS events based on a LaTeX source document and +configuration data. + +The conversion is not strong enough to work with arbitrary LaTeX +documents; it has only been designed to work with the highly stylized +markup used in the standard Python documentation. A lot of +information about specific markup is encoded in the control table +passed to the convert() function; changing this table can allow this +tool to support additional LaTeX markups. + +The format of the table is largely undocumented; see the commented +headers where the table is specified in main(). There is no provision +to load an alternate table from an external file. +""" + +import errno +import getopt +import os +import re +import sys +import xml.sax +import xml.sax.saxutils + +from esistools import encode + + +DEBUG = 0 + + +class LaTeXFormatError(Exception): + pass + + +class LaTeXStackError(LaTeXFormatError): + def __init__(self, found, stack): + msg = "environment close for %s doesn't match;\n stack = %s" \ + % (found, stack) + self.found = found + self.stack = stack[:] + LaTeXFormatError.__init__(self, msg) + + +_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") +_end_env_rx = re.compile(r"[\\]end{([^}]*)}") +_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)") +_comment_rx = re.compile("%+ ?(.*)\n[ \t]*") +_text_rx = re.compile(r"[^]~%\\{}]+") +_optional_rx = re.compile(r"\s*[[]([^]]*)[]]", re.MULTILINE) +# _parameter_rx is this complicated to allow {...} inside a parameter; +# this is useful to match tabular layout specifications like {c|p{24pt}} +_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}") +_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") +_start_group_rx = re.compile("[ \n]*{") +_start_optional_rx = re.compile("[ \n]*[[]") + + +ESCAPED_CHARS = "$%#^ {}&~" + + +def dbgmsg(msg): + if DEBUG: + sys.stderr.write(msg + "\n") + +def pushing(name, point, depth): + dbgmsg("pushing <%s> at %s" % (name, point)) + +def popping(name, point, depth): + dbgmsg("popping </%s> at %s" % (name, point)) + + +class _Stack(list): + def append(self, entry): + if not isinstance(entry, str): + raise LaTeXFormatError("cannot push non-string on stack: %r" + % (entry, )) + #dbgmsg("%s<%s>" % (" "*len(self.data), entry)) + list.append(self, entry) + + def pop(self, index=-1): + entry = self[index] + del self[index] + #dbgmsg("%s</%s>" % (" " * len(self), entry)) + + def __delitem__(self, index): + entry = self[index] + list.__delitem__(self, index) + #dbgmsg("%s</%s>" % (" " * len(self), entry)) + + +def new_stack(): + if DEBUG: + return _Stack() + else: + return [] + + +class Conversion: + def __init__(self, ifp, ofp, table): + self.write = ofp.write + self.ofp = ofp + self.table = table + L = [s.rstrip() for s in ifp.readlines()] + L.append("") + self.line = "\n".join(L) + self.preamble = 1 + + def convert(self): + self.subconvert() + + def subconvert(self, endchar=None, depth=0): + # + # Parses content, including sub-structures, until the character + # 'endchar' is found (with no open structures), or until the end + # of the input data is endchar is None. + # + stack = new_stack() + line = self.line + while line: + if line[0] == endchar and not stack: + self.line = line + return line + m = _comment_rx.match(line) + if m: + text = m.group(1) + if text: + self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" + % encode(text)) + line = line[m.end():] + continue + m = _begin_env_rx.match(line) + if m: + name = m.group(1) + entry = self.get_env_entry(name) + # re-write to use the macro handler + line = r"\%s %s" % (name, line[m.end():]) + continue + m = _end_env_rx.match(line) + if m: + # end of environment + envname = m.group(1) + entry = self.get_entry(envname) + while stack and envname != stack[-1] \ + and stack[-1] in entry.endcloses: + self.write(")%s\n" % stack.pop()) + if stack and envname == stack[-1]: + self.write(")%s\n" % entry.outputname) + del stack[-1] + else: + raise LaTeXStackError(envname, stack) + line = line[m.end():] + continue + m = _begin_macro_rx.match(line) + if m: + # start of macro + macroname = m.group(1) + if macroname == "c": + # Ugh! This is a combining character... + endpos = m.end() + self.combining_char("c", line[endpos]) + line = line[endpos + 1:] + continue + entry = self.get_entry(macroname) + if entry.verbatim: + # magic case! + pos = line.find("\\end{%s}" % macroname) + text = line[m.end(1):pos] + stack.append(entry.name) + self.write("(%s\n" % entry.outputname) + self.write("-%s\n" % encode(text)) + self.write(")%s\n" % entry.outputname) + stack.pop() + line = line[pos + len("\\end{%s}" % macroname):] + continue + while stack and stack[-1] in entry.closes: + top = stack.pop() + topentry = self.get_entry(top) + if topentry.outputname: + self.write(")%s\n-\\n\n" % topentry.outputname) + # + if entry.outputname and entry.empty: + self.write("e\n") + # + params, optional, empty = self.start_macro(macroname) + # rip off the macroname + if params: + line = line[m.end(1):] + elif empty: + line = line[m.end(1):] + else: + line = line[m.end():] + opened = 0 + implied_content = 0 + + # handle attribute mappings here: + for pentry in params: + if pentry.type == "attribute": + if pentry.optional: + m = _optional_rx.match(line) + if m and entry.outputname: + line = line[m.end():] + self.dump_attr(pentry, m.group(1)) + elif pentry.text and entry.outputname: + # value supplied by conversion spec: + self.dump_attr(pentry, pentry.text) + else: + m = _parameter_rx.match(line) + if not m: + raise LaTeXFormatError( + "could not extract parameter %s for %s: %r" + % (pentry.name, macroname, line[:100])) + if entry.outputname: + self.dump_attr(pentry, m.group(1)) + line = line[m.end():] + elif pentry.type == "child": + if pentry.optional: + m = _optional_rx.match(line) + if m: + line = line[m.end():] + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(macroname) + stack.append(pentry.name) + self.write("(%s\n" % pentry.name) + self.write("-%s\n" % encode(m.group(1))) + self.write(")%s\n" % pentry.name) + stack.pop() + else: + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + self.write("(%s\n" % pentry.name) + stack.append(pentry.name) + self.line = skip_white(line)[1:] + line = self.subconvert( + "}", len(stack) + depth + 1)[1:] + self.write(")%s\n" % stack.pop()) + elif pentry.type == "content": + if pentry.implied: + implied_content = 1 + else: + if entry.outputname and not opened: + opened = 1 + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + line = skip_white(line) + if line[0] != "{": + raise LaTeXFormatError( + "missing content for " + macroname) + self.line = line[1:] + line = self.subconvert("}", len(stack) + depth + 1) + if line and line[0] == "}": + line = line[1:] + elif pentry.type == "text" and pentry.text: + if entry.outputname and not opened: + opened = 1 + stack.append(entry.name) + self.write("(%s\n" % entry.outputname) + #dbgmsg("--- text: %r" % pentry.text) + self.write("-%s\n" % encode(pentry.text)) + elif pentry.type == "entityref": + self.write("&%s\n" % pentry.name) + if entry.outputname: + if not opened: + self.write("(%s\n" % entry.outputname) + stack.append(entry.name) + if not implied_content: + self.write(")%s\n" % entry.outputname) + stack.pop() + continue + if line[0] == endchar and not stack: + self.line = line[1:] + return self.line + if line[0] == "}": + # end of macro or group + macroname = stack[-1] + if macroname: + conversion = self.table[macroname] + if conversion.outputname: + # otherwise, it was just a bare group + self.write(")%s\n" % conversion.outputname) + del stack[-1] + line = line[1:] + continue + if line[0] == "~": + # don't worry about the "tie" aspect of this command + line = line[1:] + self.write("- \n") + continue + if line[0] == "{": + stack.append("") + line = line[1:] + continue + if line[0] == "\\" and line[1] in ESCAPED_CHARS: + self.write("-%s\n" % encode(line[1])) + line = line[2:] + continue + if line[:2] == r"\\": + self.write("(BREAK\n)BREAK\n") + line = line[2:] + continue + if line[:2] == r"\_": + line = "_" + line[2:] + continue + if line[:2] in (r"\'", r'\"'): + # combining characters... + self.combining_char(line[1], line[2]) + line = line[3:] + continue + m = _text_rx.match(line) + if m: + text = encode(m.group()) + self.write("-%s\n" % text) + line = line[m.end():] + continue + # special case because of \item[] + # XXX can we axe this??? + if line[0] == "]": + self.write("-]\n") + line = line[1:] + continue + # avoid infinite loops + extra = "" + if len(line) > 100: + extra = "..." + raise LaTeXFormatError("could not identify markup: %r%s" + % (line[:100], extra)) + while stack: + entry = self.get_entry(stack[-1]) + if entry.closes: + self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) + del stack[-1] + else: + break + if stack: + raise LaTeXFormatError("elements remain on stack: " + + ", ".join(stack)) + # otherwise we just ran out of input here... + + # This is a really limited table of combinations, but it will have + # to do for now. + _combinations = { + ("c", "c"): 0x00E7, + ("'", "e"): 0x00E9, + ('"', "o"): 0x00F6, + } + + def combining_char(self, prefix, char): + ordinal = self._combinations[(prefix, char)] + self.write("-\\%%%d;\n" % ordinal) + + def start_macro(self, name): + conversion = self.get_entry(name) + parameters = conversion.parameters + optional = parameters and parameters[0].optional + return parameters, optional, conversion.empty + + def get_entry(self, name): + entry = self.table.get(name) + if entry is None: + dbgmsg("get_entry(%r) failing; building default entry!" % (name, )) + # not defined; build a default entry: + entry = TableEntry(name) + entry.has_content = 1 + entry.parameters.append(Parameter("content")) + self.table[name] = entry + return entry + + def get_env_entry(self, name): + entry = self.table.get(name) + if entry is None: + # not defined; build a default entry: + entry = TableEntry(name, 1) + entry.has_content = 1 + entry.parameters.append(Parameter("content")) + entry.parameters[-1].implied = 1 + self.table[name] = entry + elif not entry.environment: + raise LaTeXFormatError( + name + " is defined as a macro; expected environment") + return entry + + def dump_attr(self, pentry, value): + if not (pentry.name and value): + return + if _token_rx.match(value): + dtype = "TOKEN" + else: + dtype = "CDATA" + self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value))) + + +def convert(ifp, ofp, table): + c = Conversion(ifp, ofp, table) + try: + c.convert() + except IOError, (err, msg): + if err != errno.EPIPE: + raise + + +def skip_white(line): + while line and line[0] in " %\n\t\r": + line = line[1:].lstrip() + return line + + + +class TableEntry: + def __init__(self, name, environment=0): + self.name = name + self.outputname = name + self.environment = environment + self.empty = not environment + self.has_content = 0 + self.verbatim = 0 + self.auto_close = 0 + self.parameters = [] + self.closes = [] + self.endcloses = [] + +class Parameter: + def __init__(self, type, name=None, optional=0): + self.type = type + self.name = name + self.optional = optional + self.text = '' + self.implied = 0 + + +class TableHandler(xml.sax.handler.ContentHandler): + def __init__(self): + self.__table = {} + self.__buffer = '' + self.__methods = {} + + def get_table(self): + for entry in self.__table.values(): + if entry.environment and not entry.has_content: + p = Parameter("content") + p.implied = 1 + entry.parameters.append(p) + entry.has_content = 1 + return self.__table + + def startElement(self, tag, attrs): + try: + start, end = self.__methods[tag] + except KeyError: + start = getattr(self, "start_" + tag, None) + end = getattr(self, "end_" + tag, None) + self.__methods[tag] = (start, end) + if start: + start(attrs) + + def endElement(self, tag): + start, end = self.__methods[tag] + if end: + end() + + def endDocument(self): + self.__methods.clear() + + def characters(self, data): + self.__buffer += data + + def start_environment(self, attrs): + name = attrs["name"] + self.__current = TableEntry(name, environment=1) + self.__current.verbatim = attrs.get("verbatim") == "yes" + if attrs.has_key("outputname"): + self.__current.outputname = attrs.get("outputname") + self.__current.endcloses = attrs.get("endcloses", "").split() + def end_environment(self): + self.end_macro() + + def start_macro(self, attrs): + name = attrs["name"] + self.__current = TableEntry(name) + self.__current.closes = attrs.get("closes", "").split() + if attrs.has_key("outputname"): + self.__current.outputname = attrs.get("outputname") + def end_macro(self): + name = self.__current.name + if self.__table.has_key(name): + raise ValueError("name %r already in use" % (name,)) + self.__table[name] = self.__current + self.__current = None + + def start_attribute(self, attrs): + name = attrs.get("name") + optional = attrs.get("optional") == "yes" + if name: + p = Parameter("attribute", name, optional=optional) + else: + p = Parameter("attribute", optional=optional) + self.__current.parameters.append(p) + self.__buffer = '' + def end_attribute(self): + self.__current.parameters[-1].text = self.__buffer + + def start_entityref(self, attrs): + name = attrs["name"] + p = Parameter("entityref", name) + self.__current.parameters.append(p) + + def start_child(self, attrs): + name = attrs["name"] + p = Parameter("child", name, attrs.get("optional") == "yes") + self.__current.parameters.append(p) + self.__current.empty = 0 + + def start_content(self, attrs): + p = Parameter("content") + p.implied = attrs.get("implied") == "yes" + if self.__current.environment: + p.implied = 1 + self.__current.parameters.append(p) + self.__current.has_content = 1 + self.__current.empty = 0 + + def start_text(self, attrs): + self.__current.empty = 0 + self.__buffer = '' + def end_text(self): + p = Parameter("text") + p.text = self.__buffer + self.__current.parameters.append(p) + + +def load_table(fp): + ch = TableHandler() + xml.sax.parse(fp, ch) + return ch.get_table() + + +def main(): + global DEBUG + # + opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"]) + for opt, arg in opts: + if opt in ("-D", "--debug"): + DEBUG += 1 + if len(args) == 0: + ifp = sys.stdin + ofp = sys.stdout + elif len(args) == 1: + ifp = open(args[0]) + ofp = sys.stdout + elif len(args) == 2: + ifp = open(args[0]) + ofp = open(args[1], "w") + else: + usage() + sys.exit(2) + + table = load_table(open(os.path.join(sys.path[0], 'conversion.xml'))) + convert(ifp, ofp, table) + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/sgmlconv/make.rules b/sys/src/cmd/python/Doc/tools/sgmlconv/make.rules new file mode 100644 index 000000000..93579c535 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/sgmlconv/make.rules @@ -0,0 +1,48 @@ +# -*- makefile -*- +# +# Extra magic needed by the LaTeX->XML conversion process. This requires +# $(TOOLSDIR) to be properly defined. + +DOCFIXER= $(TOOLSDIR)/sgmlconv/docfixer.py +ESIS2ML= $(TOOLSDIR)/sgmlconv/esis2sgml.py +LATEX2ESIS= $(TOOLSDIR)/sgmlconv/latex2esis.py +CONVERSION= $(TOOLSDIR)/sgmlconv/conversion.xml + +ESISTARGETS= $(patsubst %.tex,%.esis,$(wildcard *.tex)) +ESIS1TARGETS= $(patsubst %.tex,%.esis1,$(wildcard *.tex)) +XMLTARGETS= $(patsubst %.tex,%.xml,$(wildcard *.tex)) + +L2EFLAGS= + +all: xml + +esis: $(ESISTARGETS) +esis1: $(ESIS1TARGETS) +xml: $(XMLTARGETS) + +ESISTOOLS= $(TOOLSDIR)/sgmlconv/esistools.py + +$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS) $(CONVERSION) +$(ESIS1TARGETS): $(LATEX2ESIS) $(CONVERSION) +# This variant is easier to work with while debugging the conversion spec: +#$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS) +$(XMLTARGETS): $(ESIS2ML) + + +.SUFFIXES: .esis .esis1 .tex .xml + +.tex.esis1: + $(LATEX2ESIS) $(L2EFLAGS) $< $@ + +.esis1.esis: + $(DOCFIXER) $< $@ + +.esis.xml: + $(ESIS2ML) --xml $< $@ + + +clean: + rm -f *.esis *.esis1 + +clobber: clean + rm -f *.xml diff --git a/sys/src/cmd/python/Doc/tools/support.py b/sys/src/cmd/python/Doc/tools/support.py new file mode 100644 index 000000000..5b8471aac --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/support.py @@ -0,0 +1,202 @@ +"""Miscellaneous support code shared by some of the tool scripts. + +This includes option parsing code, HTML formatting code, and a couple of +useful helpers. + +""" +__version__ = '$Revision: 37764 $' + + +import getopt +import os.path +import sys + + +class Options: + __short_args = "a:c:ho:" + __long_args = [ + # script controls + "columns=", "help", "output=", + + # content components + "address=", "iconserver=", "favicon=", + "title=", "uplink=", "uptitle=", + "image-type=", + ] + + outputfile = "-" + columns = 1 + letters = 0 + uplink = "index.html" + uptitle = "Python Documentation Index" + favicon = None + + # The "Aesop Meta Tag" is poorly described, and may only be used + # by the Aesop search engine (www.aesop.com), but doesn't hurt. + # + # There are a number of values this may take to roughly categorize + # a page. A page should be marked according to its primary + # category. Known values are: + # 'personal' -- personal-info + # 'information' -- information + # 'interactive' -- interactive media + # 'multimedia' -- multimedia presenetation (non-sales) + # 'sales' -- sales material + # 'links' -- links to other information pages + # + # Setting the aesop_type value to one of these strings will cause + # get_header() to add the appropriate <meta> tag to the <head>. + # + aesop_type = None + + def __init__(self): + self.args = [] + self.variables = {"address": "", + "iconserver": "icons", + "imgtype": "png", + "title": "Global Module Index", + } + + def add_args(self, short=None, long=None): + if short: + self.__short_args = self.__short_args + short + if long: + self.__long_args = self.__long_args + long + + def parse(self, args): + try: + opts, args = getopt.getopt(args, self.__short_args, + self.__long_args) + except getopt.error: + sys.stdout = sys.stderr + self.usage() + sys.exit(2) + self.args = self.args + args + for opt, val in opts: + if opt in ("-a", "--address"): + val = val.strip() + if val: + val = "<address>\n%s\n</address>\n" % val + self.variables["address"] = val + elif opt in ("-h", "--help"): + self.usage() + sys.exit() + elif opt in ("-o", "--output"): + self.outputfile = val + elif opt in ("-c", "--columns"): + self.columns = int(val) + elif opt == "--title": + self.variables["title"] = val.strip() + elif opt == "--uplink": + self.uplink = val.strip() + elif opt == "--uptitle": + self.uptitle = val.strip() + elif opt == "--iconserver": + self.variables["iconserver"] = val.strip() or "." + elif opt == "--favicon": + self.favicon = val.strip() + elif opt == "--image-type": + self.variables["imgtype"] = val.strip() + else: + self.handle_option(opt, val) + if self.uplink and self.uptitle: + self.variables["uplinkalt"] = "up" + self.variables["uplinkicon"] = "up" + else: + self.variables["uplinkalt"] = "" + self.variables["uplinkicon"] = "blank" + self.variables["uplink"] = self.uplink + self.variables["uptitle"] = self.uptitle + + def handle_option(self, opt, val): + raise getopt.error("option %s not recognized" % opt) + + def get_header(self): + s = HEAD % self.variables + if self.uplink: + if self.uptitle: + link = ('<link rel="up" href="%s" title="%s">\n ' + '<link rel="start" href="%s" title="%s">' + % (self.uplink, self.uptitle, + self.uplink, self.uptitle)) + else: + link = ('<link rel="up" href="%s">\n ' + '<link rel="start" href="%s">' + % (self.uplink, self.uplink)) + repl = " %s\n</head>" % link + s = s.replace("</head>", repl, 1) + if self.aesop_type: + meta = '<meta name="aesop" content="%s">\n ' % self.aesop_type + # Insert this in the middle of the head that's been + # generated so far, keeping <meta> and <link> elements in + # neat groups: + s = s.replace("<link ", meta + "<link ", 1) + if self.favicon: + ext = os.path.splitext(self.favicon)[1] + if ext in (".gif", ".png"): + type = ' type="image/%s"' % ext[1:] + else: + type = '' + link = ('<link rel="SHORTCUT ICON" href="%s"%s>\n ' + % (self.favicon, type)) + s = s.replace("<link ", link + "<link ", 1) + return s + + def get_footer(self): + return TAIL % self.variables + + def get_output_file(self, filename=None): + if filename is None: + filename = self.outputfile + if filename == "-": + return sys.stdout + else: + return open(filename, "w") + + +NAVIGATION = '''\ +<div class="navigation"> +<table width="100%%" cellpadding="0" cellspacing="2"> +<tr> +<td><img width="32" height="32" align="bottom" border="0" alt="" + src="%(iconserver)s/blank.%(imgtype)s"></td> +<td><a href="%(uplink)s" + title="%(uptitle)s"><img width="32" height="32" align="bottom" border="0" + alt="%(uplinkalt)s" + src="%(iconserver)s/%(uplinkicon)s.%(imgtype)s"></a></td> +<td><img width="32" height="32" align="bottom" border="0" alt="" + src="%(iconserver)s/blank.%(imgtype)s"></td> +<td align="center" width="100%%">%(title)s</td> +<td><img width="32" height="32" align="bottom" border="0" alt="" + src="%(iconserver)s/blank.%(imgtype)s"></td> +<td><img width="32" height="32" align="bottom" border="0" alt="" + src="%(iconserver)s/blank.%(imgtype)s"></td> +<td><img width="32" height="32" align="bottom" border="0" alt="" + src="%(iconserver)s/blank.%(imgtype)s"></td> +</tr></table> +<b class="navlabel">Up:</b> <span class="sectref"><a href="%(uplink)s" + title="%(uptitle)s">%(uptitle)s</A></span> +<br></div> +''' + +HEAD = '''\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> +<html> +<head> + <title>%(title)s</title> + <meta name="description" content="%(title)s"> + <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> + <link rel="STYLESHEET" href="lib/lib.css"> +</head> +<body> +''' + NAVIGATION + '''\ +<hr> + +<h2>%(title)s</h2> + +''' + +TAIL = "<hr>\n" + NAVIGATION + '''\ +%(address)s</body> +</html> +''' diff --git a/sys/src/cmd/python/Doc/tools/toc2bkm.py b/sys/src/cmd/python/Doc/tools/toc2bkm.py new file mode 100755 index 000000000..ab669ba95 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/toc2bkm.py @@ -0,0 +1,160 @@ +#! /usr/bin/env python + +"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline. + +The output file has an extension of '.bkm' instead of '.out', since hyperref +already uses that extension. +""" + +import getopt +import os +import re +import string +import sys + + +# Ench item in an entry is a tuple of: +# +# Section #, Title String, Page #, List of Sub-entries +# +# The return value of parse_toc() is such a tuple. + +cline_re = r"""^ +\\contentsline\ \{([a-z]*)} # type of section in $1 +\{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number +(.*)} # title string +\{(\d+)}$""" # page number + +cline_rx = re.compile(cline_re, re.VERBOSE) + +OUTER_TO_INNER = -1 + +_transition_map = { + ('chapter', 'section'): OUTER_TO_INNER, + ('section', 'subsection'): OUTER_TO_INNER, + ('subsection', 'subsubsection'): OUTER_TO_INNER, + ('subsubsection', 'subsection'): 1, + ('subsection', 'section'): 1, + ('section', 'chapter'): 1, + ('subsection', 'chapter'): 2, + ('subsubsection', 'section'): 2, + ('subsubsection', 'chapter'): 3, + } + +INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection") + + +class BadSectionNesting(Exception): + """Raised for unsupported section level transitions.""" + + def __init__(self, level, newsection, path, lineno): + self.level = level + self.newsection = newsection + self.path = path + self.lineno = lineno + + def __str__(self): + return ("illegal transition from %s to %s at %s (line %s)" + % (self.level, self.newsection, self.path, self.lineno)) + + +def parse_toc(fp, bigpart=None): + toc = top = [] + stack = [toc] + level = bigpart or 'chapter' + lineno = 0 + while 1: + line = fp.readline() + if not line: + break + lineno = lineno + 1 + m = cline_rx.match(line) + if m: + stype, snum, title, pageno = m.group(1, 2, 3, 4) + title = clean_title(title) + entry = (stype, snum, title, int(pageno), []) + if stype == level: + toc.append(entry) + else: + if stype not in INCLUDED_LEVELS: + # we don't want paragraphs & subparagraphs + continue + try: + direction = _transition_map[(level, stype)] + except KeyError: + raise BadSectionNesting(level, stype, fp.name, lineno) + if direction == OUTER_TO_INNER: + toc = toc[-1][-1] + stack.insert(0, toc) + toc.append(entry) + else: + for i in range(direction): + del stack[0] + toc = stack[0] + toc.append(entry) + level = stype + else: + sys.stderr.write("l.%s: " + line) + return top + + +hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}") +raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}") +title_rx = re.compile(r"\\([a-zA-Z])+\s+") +title_trans = string.maketrans("", "") + +def clean_title(title): + title = raisebox_rx.sub("", title) + title = hackscore_rx.sub(r"\\_", title) + pos = 0 + while 1: + m = title_rx.search(title, pos) + if m: + start = m.start() + if title[start:start+15] != "\\textunderscore": + title = title[:start] + title[m.end():] + pos = start + 1 + else: + break + title = title.translate(title_trans, "{}") + return title + + +def write_toc(toc, fp): + for entry in toc: + write_toc_entry(entry, fp, 0) + +def write_toc_entry(entry, fp, layer): + stype, snum, title, pageno, toc = entry + s = "\\pdfoutline goto name{page%03d}" % pageno + if toc: + s = "%s count -%d" % (s, len(toc)) + if snum: + title = "%s %s" % (snum, title) + s = "%s {%s}\n" % (s, title) + fp.write(s) + for entry in toc: + write_toc_entry(entry, fp, layer + 1) + + +def process(ifn, ofn, bigpart=None): + toc = parse_toc(open(ifn), bigpart) + write_toc(toc, open(ofn, "w")) + + +def main(): + bigpart = None + opts, args = getopt.getopt(sys.argv[1:], "c:") + if opts: + bigpart = opts[0][1] + if not args: + usage() + sys.exit(2) + for filename in args: + base, ext = os.path.splitext(filename) + ext = ext or ".toc" + process(base + ext, base + ".bkm", bigpart) + + +if __name__ == "__main__": + main() diff --git a/sys/src/cmd/python/Doc/tools/undoc_symbols.py b/sys/src/cmd/python/Doc/tools/undoc_symbols.py new file mode 100644 index 000000000..3d776fa45 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/undoc_symbols.py @@ -0,0 +1,94 @@ +#! /usr/bin/env python + +"""\ +This script prints out a list of undocumented symbols found in +Python include files, prefixed by their tag kind. + +Pass Python's include files to ctags, parse the output into a +dictionary mapping symbol names to tag kinds. + +Then, the .tex files from Python docs are read into a giant string. + +Finally all symbols not found in the docs are written to standard +output, prefixed with their tag kind. +""" + +# Which kind of tags do we need? +TAG_KINDS = "dpst" + +# Doc sections to use +DOCSECTIONS = ["api"]# ["api", "ext"] + +# Only print symbols starting with this prefix, +# to get all symbols, use an empty string +PREFIXES = ("Py", "PY") + +INCLUDEPATTERN = "*.h" + +# end of customization section + + +# Tested with EXUBERANT CTAGS +# see http://ctags.sourceforge.net +# +# ctags fields are separated by tabs. +# The first field is the name, the last field the type: +# d macro definitions (and #undef names) +# e enumerators +# f function definitions +# g enumeration names +# m class, struct, or union members +# n namespaces +# p function prototypes and declarations +# s structure names +# t typedefs +# u union names +# v variable definitions +# x extern and forward variable declarations + +import os, glob, re, sys + +def findnames(file, prefixes=()): + names = {} + for line in file.xreadlines(): + if line[0] == '!': + continue + fields = line.split() + name, tag = fields[0], fields[-1] + if tag == 'd' and name.endswith('_H'): + continue + if prefixes: + sw = name.startswith + for prefix in prefixes: + if sw(prefix): + names[name] = tag + else: + names[name] = tag + return names + +def print_undoc_symbols(prefix, docdir, incdir): + docs = [] + + for sect in DOCSECTIONS: + for file in glob.glob(os.path.join(docdir, sect, "*.tex")): + docs.append(open(file).read()) + + docs = "\n".join(docs) + + incfiles = os.path.join(incdir, INCLUDEPATTERN) + + fp = os.popen("ctags -IPyAPI_FUNC -IPy_GCC_ATTRIBUTE --c-types=%s -f - %s" + % (TAG_KINDS, incfiles)) + dict = findnames(fp, prefix) + names = dict.keys() + names.sort() + for name in names: + if not re.search("%s\\W" % name, docs): + print dict[name], name + +if __name__ == '__main__': + srcdir = os.path.dirname(sys.argv[0]) + incdir = os.path.normpath(os.path.join(srcdir, "../../Include")) + docdir = os.path.normpath(os.path.join(srcdir, "..")) + + print_undoc_symbols(PREFIXES, docdir, incdir) diff --git a/sys/src/cmd/python/Doc/tools/update-docs.sh b/sys/src/cmd/python/Doc/tools/update-docs.sh new file mode 100755 index 000000000..6599c64d1 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/update-docs.sh @@ -0,0 +1,31 @@ +#! /bin/sh + +# Script which installs a development snapshot of the documentation +# into the development website. +# +# The push-docs.sh script pushes this to the server when needed +# and removes it when done. + +if [ -z "$HOME" ] ; then + HOME=`grep "$LOGNAME" /etc/passwd | sed 's|^.*:\([^:]*\):[^:]*$|\1|'` + export HOME +fi + +DOCTYPE="$1" +UPDATES="$HOME/tmp/$2" + +TMPDIR="$$-docs" + +cd /ftp/ftp.python.org/pub/www.python.org/dev/doc/ || exit $? +mkdir $TMPDIR || exit $? +cd $TMPDIR || exit $? +(bzip2 -dc "$UPDATES" | tar xf -) || exit $? +cd .. || exit $? + +if [ -d $DOCTYPE ] ; then + mv $DOCTYPE $DOCTYPE-temp +fi +mv $TMPDIR/Python-Docs-* $DOCTYPE +rmdir $TMPDIR +rm -rf $DOCTYPE-temp || exit $? +mv "$UPDATES" python-docs-$DOCTYPE.tar.bz2 || exit $? diff --git a/sys/src/cmd/python/Doc/tools/whichlibs b/sys/src/cmd/python/Doc/tools/whichlibs new file mode 100755 index 000000000..10d44ee71 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/whichlibs @@ -0,0 +1,2 @@ +#!/bin/sh +sed -n 's%^\\input{\(lib[a-zA-Z0-9_]*\)}.*%../lib/\1.tex%p' ../lib/lib.tex |