summaryrefslogtreecommitdiff
path: root/sys/src/cmd/python/Doc/tools/toc2bkm.py
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
committercinap_lenrek <cinap_lenrek@localhost>2011-05-03 11:25:13 +0000
commit458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Doc/tools/toc2bkm.py
parent3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Doc/tools/toc2bkm.py')
-rwxr-xr-xsys/src/cmd/python/Doc/tools/toc2bkm.py160
1 files changed, 160 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Doc/tools/toc2bkm.py b/sys/src/cmd/python/Doc/tools/toc2bkm.py
new file mode 100755
index 000000000..ab669ba95
--- /dev/null
+++ b/sys/src/cmd/python/Doc/tools/toc2bkm.py
@@ -0,0 +1,160 @@
+#! /usr/bin/env python
+
+"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
+
+The output file has an extension of '.bkm' instead of '.out', since hyperref
+already uses that extension.
+"""
+
+import getopt
+import os
+import re
+import string
+import sys
+
+
+# Ench item in an entry is a tuple of:
+#
+# Section #, Title String, Page #, List of Sub-entries
+#
+# The return value of parse_toc() is such a tuple.
+
+cline_re = r"""^
+\\contentsline\ \{([a-z]*)} # type of section in $1
+\{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
+(.*)} # title string
+\{(\d+)}$""" # page number
+
+cline_rx = re.compile(cline_re, re.VERBOSE)
+
+OUTER_TO_INNER = -1
+
+_transition_map = {
+ ('chapter', 'section'): OUTER_TO_INNER,
+ ('section', 'subsection'): OUTER_TO_INNER,
+ ('subsection', 'subsubsection'): OUTER_TO_INNER,
+ ('subsubsection', 'subsection'): 1,
+ ('subsection', 'section'): 1,
+ ('section', 'chapter'): 1,
+ ('subsection', 'chapter'): 2,
+ ('subsubsection', 'section'): 2,
+ ('subsubsection', 'chapter'): 3,
+ }
+
+INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
+
+
+class BadSectionNesting(Exception):
+ """Raised for unsupported section level transitions."""
+
+ def __init__(self, level, newsection, path, lineno):
+ self.level = level
+ self.newsection = newsection
+ self.path = path
+ self.lineno = lineno
+
+ def __str__(self):
+ return ("illegal transition from %s to %s at %s (line %s)"
+ % (self.level, self.newsection, self.path, self.lineno))
+
+
+def parse_toc(fp, bigpart=None):
+ toc = top = []
+ stack = [toc]
+ level = bigpart or 'chapter'
+ lineno = 0
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ lineno = lineno + 1
+ m = cline_rx.match(line)
+ if m:
+ stype, snum, title, pageno = m.group(1, 2, 3, 4)
+ title = clean_title(title)
+ entry = (stype, snum, title, int(pageno), [])
+ if stype == level:
+ toc.append(entry)
+ else:
+ if stype not in INCLUDED_LEVELS:
+ # we don't want paragraphs & subparagraphs
+ continue
+ try:
+ direction = _transition_map[(level, stype)]
+ except KeyError:
+ raise BadSectionNesting(level, stype, fp.name, lineno)
+ if direction == OUTER_TO_INNER:
+ toc = toc[-1][-1]
+ stack.insert(0, toc)
+ toc.append(entry)
+ else:
+ for i in range(direction):
+ del stack[0]
+ toc = stack[0]
+ toc.append(entry)
+ level = stype
+ else:
+ sys.stderr.write("l.%s: " + line)
+ return top
+
+
+hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
+raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
+title_rx = re.compile(r"\\([a-zA-Z])+\s+")
+title_trans = string.maketrans("", "")
+
+def clean_title(title):
+ title = raisebox_rx.sub("", title)
+ title = hackscore_rx.sub(r"\\_", title)
+ pos = 0
+ while 1:
+ m = title_rx.search(title, pos)
+ if m:
+ start = m.start()
+ if title[start:start+15] != "\\textunderscore":
+ title = title[:start] + title[m.end():]
+ pos = start + 1
+ else:
+ break
+ title = title.translate(title_trans, "{}")
+ return title
+
+
+def write_toc(toc, fp):
+ for entry in toc:
+ write_toc_entry(entry, fp, 0)
+
+def write_toc_entry(entry, fp, layer):
+ stype, snum, title, pageno, toc = entry
+ s = "\\pdfoutline goto name{page%03d}" % pageno
+ if toc:
+ s = "%s count -%d" % (s, len(toc))
+ if snum:
+ title = "%s %s" % (snum, title)
+ s = "%s {%s}\n" % (s, title)
+ fp.write(s)
+ for entry in toc:
+ write_toc_entry(entry, fp, layer + 1)
+
+
+def process(ifn, ofn, bigpart=None):
+ toc = parse_toc(open(ifn), bigpart)
+ write_toc(toc, open(ofn, "w"))
+
+
+def main():
+ bigpart = None
+ opts, args = getopt.getopt(sys.argv[1:], "c:")
+ if opts:
+ bigpart = opts[0][1]
+ if not args:
+ usage()
+ sys.exit(2)
+ for filename in args:
+ base, ext = os.path.splitext(filename)
+ ext = ext or ".toc"
+ process(base + ext, base + ".bkm", bigpart)
+
+
+if __name__ == "__main__":
+ main()