diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/tools/toc2bkm.py | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/tools/toc2bkm.py')
-rwxr-xr-x | sys/src/cmd/python/Doc/tools/toc2bkm.py | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/tools/toc2bkm.py b/sys/src/cmd/python/Doc/tools/toc2bkm.py new file mode 100755 index 000000000..ab669ba95 --- /dev/null +++ b/sys/src/cmd/python/Doc/tools/toc2bkm.py @@ -0,0 +1,160 @@ +#! /usr/bin/env python + +"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline. + +The output file has an extension of '.bkm' instead of '.out', since hyperref +already uses that extension. +""" + +import getopt +import os +import re +import string +import sys + + +# Ench item in an entry is a tuple of: +# +# Section #, Title String, Page #, List of Sub-entries +# +# The return value of parse_toc() is such a tuple. + +cline_re = r"""^ +\\contentsline\ \{([a-z]*)} # type of section in $1 +\{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number +(.*)} # title string +\{(\d+)}$""" # page number + +cline_rx = re.compile(cline_re, re.VERBOSE) + +OUTER_TO_INNER = -1 + +_transition_map = { + ('chapter', 'section'): OUTER_TO_INNER, + ('section', 'subsection'): OUTER_TO_INNER, + ('subsection', 'subsubsection'): OUTER_TO_INNER, + ('subsubsection', 'subsection'): 1, + ('subsection', 'section'): 1, + ('section', 'chapter'): 1, + ('subsection', 'chapter'): 2, + ('subsubsection', 'section'): 2, + ('subsubsection', 'chapter'): 3, + } + +INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection") + + +class BadSectionNesting(Exception): + """Raised for unsupported section level transitions.""" + + def __init__(self, level, newsection, path, lineno): + self.level = level + self.newsection = newsection + self.path = path + self.lineno = lineno + + def __str__(self): + return ("illegal transition from %s to %s at %s (line %s)" + % (self.level, self.newsection, self.path, self.lineno)) + + +def parse_toc(fp, bigpart=None): + toc = top = [] + stack = [toc] + level = bigpart or 'chapter' + lineno = 0 + while 1: + line = fp.readline() + if not line: + break + lineno = lineno + 1 + m = cline_rx.match(line) + if m: + stype, snum, title, pageno = m.group(1, 2, 3, 4) + title = clean_title(title) + entry = (stype, snum, title, int(pageno), []) + if stype == level: + toc.append(entry) + else: + if stype not in INCLUDED_LEVELS: + # we don't want paragraphs & subparagraphs + continue + try: + direction = _transition_map[(level, stype)] + except KeyError: + raise BadSectionNesting(level, stype, fp.name, lineno) + if direction == OUTER_TO_INNER: + toc = toc[-1][-1] + stack.insert(0, toc) + toc.append(entry) + else: + for i in range(direction): + del stack[0] + toc = stack[0] + toc.append(entry) + level = stype + else: + sys.stderr.write("l.%s: " + line) + return top + + +hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}") +raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}") +title_rx = re.compile(r"\\([a-zA-Z])+\s+") +title_trans = string.maketrans("", "") + +def clean_title(title): + title = raisebox_rx.sub("", title) + title = hackscore_rx.sub(r"\\_", title) + pos = 0 + while 1: + m = title_rx.search(title, pos) + if m: + start = m.start() + if title[start:start+15] != "\\textunderscore": + title = title[:start] + title[m.end():] + pos = start + 1 + else: + break + title = title.translate(title_trans, "{}") + return title + + +def write_toc(toc, fp): + for entry in toc: + write_toc_entry(entry, fp, 0) + +def write_toc_entry(entry, fp, layer): + stype, snum, title, pageno, toc = entry + s = "\\pdfoutline goto name{page%03d}" % pageno + if toc: + s = "%s count -%d" % (s, len(toc)) + if snum: + title = "%s %s" % (snum, title) + s = "%s {%s}\n" % (s, title) + fp.write(s) + for entry in toc: + write_toc_entry(entry, fp, layer + 1) + + +def process(ifn, ofn, bigpart=None): + toc = parse_toc(open(ifn), bigpart) + write_toc(toc, open(ofn, "w")) + + +def main(): + bigpart = None + opts, args = getopt.getopt(sys.argv[1:], "c:") + if opts: + bigpart = opts[0][1] + if not args: + usage() + sys.exit(2) + for filename in args: + base, ext = os.path.splitext(filename) + ext = ext or ".toc" + process(base + ext, base + ".bkm", bigpart) + + +if __name__ == "__main__": + main() |