add hg and python

author: cinap_lenrek <cinap_lenrek@localhost> 2011-05-03 11:25:13 +0000
committer: cinap_lenrek <cinap_lenrek@localhost> 2011-05-03 11:25:13 +0000
commit: 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree: 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Parser
parent: 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)
25 files changed, 6940 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Parser/Python.asdl b/sys/src/cmd/python/Parser/Python.asdl
new file mode 100644
index 000000000..fa4b406df
--- /dev/null
+++ b/sys/src/cmd/python/Parser/Python.asdl
@@ -0,0 +1,115 @@
+-- ASDL's five builtin types are identifier, int, string, object, bool
+
+module Python version "$Revision: 43614 $"
+{
+	mod = Module(stmt* body)
+	    | Interactive(stmt* body)
+	    | Expression(expr body)
+
+	    -- not really an actual node but useful in Jython's typesystem.
+	    | Suite(stmt* body)
+
+	stmt = FunctionDef(identifier name, arguments args, 
+                           stmt* body, expr* decorators)
+	      | ClassDef(identifier name, expr* bases, stmt* body)
+	      | Return(expr? value)
+
+	      | Delete(expr* targets)
+	      | Assign(expr* targets, expr value)
+	      | AugAssign(expr target, operator op, expr value)
+
+	      -- not sure if bool is allowed, can always use int
+ 	      | Print(expr? dest, expr* values, bool nl)
+
+	      -- use 'orelse' because else is a keyword in target languages
+	      | For(expr target, expr iter, stmt* body, stmt* orelse)
+	      | While(expr test, stmt* body, stmt* orelse)
+	      | If(expr test, stmt* body, stmt* orelse)
+	      | With(expr context_expr, expr? optional_vars, stmt* body)
+
+	      -- 'type' is a bad name
+	      | Raise(expr? type, expr? inst, expr? tback)
+	      | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
+	      | TryFinally(stmt* body, stmt* finalbody)
+	      | Assert(expr test, expr? msg)
+
+	      | Import(alias* names)
+	      | ImportFrom(identifier module, alias* names, int? level)
+
+	      -- Doesn't capture requirement that locals must be
+	      -- defined if globals is
+	      -- still supports use as a function!
+	      | Exec(expr body, expr? globals, expr? locals)
+
+	      | Global(identifier* names)
+	      | Expr(expr value)
+	      | Pass | Break | Continue
+
+	      -- XXX Jython will be different
+	      -- col_offset is the byte offset in the utf8 string the parser uses
+	      attributes (int lineno, int col_offset)
+
+	      -- BoolOp() can use left & right?
+	expr = BoolOp(boolop op, expr* values)
+	     | BinOp(expr left, operator op, expr right)
+	     | UnaryOp(unaryop op, expr operand)
+	     | Lambda(arguments args, expr body)
+	     | IfExp(expr test, expr body, expr orelse)
+	     | Dict(expr* keys, expr* values)
+	     | ListComp(expr elt, comprehension* generators)
+	     | GeneratorExp(expr elt, comprehension* generators)
+	     -- the grammar constrains where yield expressions can occur
+	     | Yield(expr? value)
+	     -- need sequences for compare to distinguish between
+	     -- x < 4 < 3 and (x < 4) < 3
+	     | Compare(expr left, cmpop* ops, expr* comparators)
+	     | Call(expr func, expr* args, keyword* keywords,
+			 expr? starargs, expr? kwargs)
+	     | Repr(expr value)
+	     | Num(object n) -- a number as a PyObject.
+	     | Str(string s) -- need to specify raw, unicode, etc?
+	     -- other literals? bools?
+
+	     -- the following expression can appear in assignment context
+	     | Attribute(expr value, identifier attr, expr_context ctx)
+	     | Subscript(expr value, slice slice, expr_context ctx)
+	     | Name(identifier id, expr_context ctx)
+	     | List(expr* elts, expr_context ctx) 
+	     | Tuple(expr *elts, expr_context ctx)
+
+	      -- col_offset is the byte offset in the utf8 string the parser uses
+	      attributes (int lineno, int col_offset)
+
+	expr_context = Load | Store | Del | AugLoad | AugStore | Param
+
+	slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step) 
+	      | ExtSlice(slice* dims) 
+	      | Index(expr value) 
+
+	boolop = And | Or 
+
+	operator = Add | Sub | Mult | Div | Mod | Pow | LShift 
+                 | RShift | BitOr | BitXor | BitAnd | FloorDiv
+
+	unaryop = Invert | Not | UAdd | USub
+
+	cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
+
+	comprehension = (expr target, expr iter, expr* ifs)
+
+	-- not sure what to call the first argument for raise and except
+	-- TODO(jhylton): Figure out if there is a better way to handle
+	--                lineno and col_offset fields, particularly when
+        --                ast is exposed to Python.
+	excepthandler = (expr? type, expr? name, stmt* body, int lineno,
+	                 int col_offset)
+
+	arguments = (expr* args, identifier? vararg, 
+		     identifier? kwarg, expr* defaults)
+
+        -- keyword arguments supplied to call
+        keyword = (identifier arg, expr value)
+
+        -- import name with optional 'as' alias.
+        alias = (identifier name, identifier? asname)
+}
diff --git a/sys/src/cmd/python/Parser/acceler.c b/sys/src/cmd/python/Parser/acceler.c
new file mode 100644
index 000000000..b41b2654f
--- /dev/null
+++ b/sys/src/cmd/python/Parser/acceler.c
@@ -0,0 +1,125 @@
+
+/* Parser accelerator module */
+
+/* The parser as originally conceived had disappointing performance.
+   This module does some precomputation that speeds up the selection
+   of a DFA based upon a token, turning a search through an array
+   into a simple indexing operation.  The parser now cannot work
+   without the accelerators installed.  Note that the accelerators
+   are installed dynamically when the parser is initialized, they
+   are not part of the static data structure written on graminit.[ch]
+   by the parser generator. */
+
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "node.h"
+#include "token.h"
+#include "parser.h"
+
+/* Forward references */
+static void fixdfa(grammar *, dfa *);
+static void fixstate(grammar *, state *);
+
+void
+PyGrammar_AddAccelerators(grammar *g)
+{
+	dfa *d;
+	int i;
+	d = g->g_dfa;
+	for (i = g->g_ndfas; --i >= 0; d++)
+		fixdfa(g, d);
+	g->g_accel = 1;
+}
+
+void
+PyGrammar_RemoveAccelerators(grammar *g)
+{
+	dfa *d;
+	int i;
+	g->g_accel = 0;
+	d = g->g_dfa;
+	for (i = g->g_ndfas; --i >= 0; d++) {
+		state *s;
+		int j;
+		s = d->d_state;
+		for (j = 0; j < d->d_nstates; j++, s++) {
+			if (s->s_accel)
+				PyObject_FREE(s->s_accel);
+			s->s_accel = NULL;
+		}
+	}
+}
+
+static void
+fixdfa(grammar *g, dfa *d)
+{
+	state *s;
+	int j;
+	s = d->d_state;
+	for (j = 0; j < d->d_nstates; j++, s++)
+		fixstate(g, s);
+}
+
+static void
+fixstate(grammar *g, state *s)
+{
+	arc *a;
+	int k;
+	int *accel;
+	int nl = g->g_ll.ll_nlabels;
+	s->s_accept = 0;
+	accel = (int *) PyObject_MALLOC(nl * sizeof(int));
+	if (accel == NULL) {
+		fprintf(stderr, "no mem to build parser accelerators\n");
+		exit(1);
+	}
+	for (k = 0; k < nl; k++)
+		accel[k] = -1;
+	a = s->s_arc;
+	for (k = s->s_narcs; --k >= 0; a++) {
+		int lbl = a->a_lbl;
+		label *l = &g->g_ll.ll_label[lbl];
+		int type = l->lb_type;
+		if (a->a_arrow >= (1 << 7)) {
+			printf("XXX too many states!\n");
+			continue;
+		}
+		if (ISNONTERMINAL(type)) {
+			dfa *d1 = PyGrammar_FindDFA(g, type);
+			int ibit;
+			if (type - NT_OFFSET >= (1 << 7)) {
+				printf("XXX too high nonterminal number!\n");
+				continue;
+			}
+			for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
+				if (testbit(d1->d_first, ibit)) {
+					if (accel[ibit] != -1)
+						printf("XXX ambiguity!\n");
+					accel[ibit] = a->a_arrow | (1 << 7) |
+						((type - NT_OFFSET) << 8);
+				}
+			}
+		}
+		else if (lbl == EMPTY)
+			s->s_accept = 1;
+		else if (lbl >= 0 && lbl < nl)
+			accel[lbl] = a->a_arrow;
+	}
+	while (nl > 0 && accel[nl-1] == -1)
+		nl--;
+	for (k = 0; k < nl && accel[k] == -1;)
+		k++;
+	if (k < nl) {
+		int i;
+		s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
+		if (s->s_accel == NULL) {
+			fprintf(stderr, "no mem to add parser accelerators\n");
+			exit(1);
+		}
+		s->s_lower = k;
+		s->s_upper = nl;
+		for (i = 0; k < nl; i++, k++)
+			s->s_accel[i] = accel[k];
+	}
+	PyObject_FREE(accel);
+}
diff --git a/sys/src/cmd/python/Parser/asdl.py b/sys/src/cmd/python/Parser/asdl.py
new file mode 100644
index 000000000..bd892b695
--- /dev/null
+++ b/sys/src/cmd/python/Parser/asdl.py
@@ -0,0 +1,415 @@
+"""An implementation of the Zephyr Abstract Syntax Definition Language.
+
+See http://asdl.sourceforge.net/ and
+http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html.
+
+Only supports top level module decl, not view.  I'm guessing that view
+is intended to support the browser and I'm not interested in the
+browser.
+
+Changes for Python: Add support for module versions
+"""
+
+#__metaclass__ = type
+
+import os
+import traceback
+
+import spark
+
+class Token:
+    # spark seems to dispatch in the parser based on a token's
+    # type attribute
+    def __init__(self, type, lineno):
+        self.type = type
+        self.lineno = lineno
+
+    def __str__(self):
+        return self.type
+
+    def __repr__(self):
+        return str(self)
+
+class Id(Token):
+    def __init__(self, value, lineno):
+        self.type = 'Id'
+        self.value = value
+        self.lineno = lineno
+
+    def __str__(self):
+        return self.value
+
+class String(Token):
+    def __init__(self, value, lineno):
+        self.type = 'String'
+        self.value = value
+        self.lineno = lineno
+
+class ASDLSyntaxError:
+
+    def __init__(self, lineno, token=None, msg=None):
+        self.lineno = lineno
+        self.token = token
+        self.msg = msg
+
+    def __str__(self):
+        if self.msg is None:
+            return "Error at '%s', line %d" % (self.token, self.lineno)
+        else:
+            return "%s, line %d" % (self.msg, self.lineno)
+
+class ASDLScanner(spark.GenericScanner, object):
+
+    def tokenize(self, input):
+        self.rv = []
+        self.lineno = 1
+        super(ASDLScanner, self).tokenize(input)
+        return self.rv
+
+    def t_id(self, s):
+        r"[\w\.]+"
+        # XXX doesn't distinguish upper vs. lower, which is
+        # significant for ASDL.
+        self.rv.append(Id(s, self.lineno))
+
+    def t_string(self, s):
+        r'"[^"]*"'
+        self.rv.append(String(s, self.lineno))
+
+    def t_xxx(self, s): # not sure what this production means
+        r"<="
+        self.rv.append(Token(s, self.lineno))
+
+    def t_punctuation(self, s):
+        r"[\{\}\*\=\|\(\)\,\?\:]"
+        self.rv.append(Token(s, self.lineno))
+
+    def t_comment(self, s):
+        r"\-\-[^\n]*"
+        pass
+
+    def t_newline(self, s):
+        r"\n"
+        self.lineno += 1
+
+    def t_whitespace(self, s):
+        r"[ \t]+"
+        pass
+
+    def t_default(self, s):
+        r" . +"
+        raise ValueError, "unmatched input: %s" % `s`
+
+class ASDLParser(spark.GenericParser, object):
+    def __init__(self):
+        super(ASDLParser, self).__init__("module")
+
+    def typestring(self, tok):
+        return tok.type
+
+    def error(self, tok):
+        raise ASDLSyntaxError(tok.lineno, tok)
+
+    def p_module_0(self, (module, name, version, _0, _1)):
+        " module ::= Id Id version { } "
+        if module.value != "module":
+            raise ASDLSyntaxError(module.lineno,
+                                  msg="expected 'module', found %s" % module)
+        return Module(name, None, version)
+
+    def p_module(self, (module, name, version, _0, definitions, _1)):
+        " module ::= Id Id version { definitions } "
+        if module.value != "module":
+            raise ASDLSyntaxError(module.lineno,
+                                  msg="expected 'module', found %s" % module)
+        return Module(name, definitions, version)
+
+    def p_version(self, (version, V)):
+        "version ::= Id String"
+        if version.value != "version":
+            raise ASDLSyntaxError(version.lineno,
+                                msg="expected 'version', found %" % version)
+        return V
+
+    def p_definition_0(self, (definition,)):
+        " definitions ::= definition "
+        return definition
+
+    def p_definition_1(self, (definitions, definition)):
+        " definitions ::= definition definitions "
+        return definitions + definition
+
+    def p_definition(self, (id, _, type)):
+        " definition ::= Id = type "
+        return [Type(id, type)]
+
+    def p_type_0(self, (product,)):
+        " type ::= product "
+        return product
+
+    def p_type_1(self, (sum,)):
+        " type ::= sum "
+        return Sum(sum)
+
+    def p_type_2(self, (sum, id, _0, attributes, _1)):
+        " type ::= sum Id ( fields ) "
+        if id.value != "attributes":
+            raise ASDLSyntaxError(id.lineno,
+                                  msg="expected attributes, found %s" % id)
+        if attributes:
+            attributes.reverse()
+        return Sum(sum, attributes)
+
+    def p_product(self, (_0, fields, _1)):
+        " product ::= ( fields ) "
+        # XXX can't I just construct things in the right order?
+        fields.reverse()
+        return Product(fields)
+
+    def p_sum_0(self, (constructor,)):
+        " sum ::= constructor """
+        return [constructor]
+
+    def p_sum_1(self, (constructor, _, sum)):
+        " sum ::= constructor | sum "
+        return [constructor] + sum
+
+    def p_sum_2(self, (constructor, _, sum)):
+        " sum ::= constructor | sum "
+        return [constructor] + sum
+
+    def p_constructor_0(self, (id,)):
+        " constructor ::= Id "
+        return Constructor(id)
+
+    def p_constructor_1(self, (id, _0, fields, _1)):
+        " constructor ::= Id ( fields ) "
+        # XXX can't I just construct things in the right order?
+        fields.reverse()
+        return Constructor(id, fields)
+
+    def p_fields_0(self, (field,)):
+        " fields ::= field "
+        return [field]
+
+    def p_fields_1(self, (field, _, fields)):
+        " fields ::= field , fields "
+        return fields + [field]
+
+    def p_field_0(self, (type,)):
+        " field ::= Id "
+        return Field(type)
+
+    def p_field_1(self, (type, name)):
+        " field ::= Id Id "
+        return Field(type, name)
+
+    def p_field_2(self, (type, _, name)):
+        " field ::= Id * Id "
+        return Field(type, name, seq=1)
+
+    def p_field_3(self, (type, _, name)):
+        " field ::= Id ? Id "
+        return Field(type, name, opt=1)
+
+    def p_field_4(self, (type, _)):
+        " field ::= Id * "
+        return Field(type, seq=1)
+
+    def p_field_5(self, (type, _)):
+        " field ::= Id ? "
+        return Field(type, opt=1)
+
+builtin_types = ("identifier", "string", "int", "bool", "object")
+
+# below is a collection of classes to capture the AST of an AST :-)
+# not sure if any of the methods are useful yet, but I'm adding them
+# piecemeal as they seem helpful
+
+class AST:
+    pass # a marker class
+
+class Module(AST):
+    def __init__(self, name, dfns, version):
+        self.name = name
+        self.dfns = dfns
+        self.version = version
+        self.types = {} # maps type name to value (from dfns)
+        for type in dfns:
+            self.types[type.name.value] = type.value
+
+    def __repr__(self):
+        return "Module(%s, %s)" % (self.name, self.dfns)
+
+class Type(AST):
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+    def __repr__(self):
+        return "Type(%s, %s)" % (self.name, self.value)
+
+class Constructor(AST):
+    def __init__(self, name, fields=None):
+        self.name = name
+        self.fields = fields or []
+
+    def __repr__(self):
+        return "Constructor(%s, %s)" % (self.name, self.fields)
+
+class Field(AST):
+    def __init__(self, type, name=None, seq=0, opt=0):
+        self.type = type
+        self.name = name
+        self.seq = seq
+        self.opt = opt
+
+    def __repr__(self):
+        if self.seq:
+            extra = ", seq=1"
+        elif self.opt:
+            extra = ", opt=1"
+        else:
+            extra = ""
+        if self.name is None:
+            return "Field(%s%s)" % (self.type, extra)
+        else:
+            return "Field(%s, %s%s)" % (self.type, self.name, extra)
+
+class Sum(AST):
+    def __init__(self, types, attributes=None):
+        self.types = types
+        self.attributes = attributes or []
+
+    def __repr__(self):
+        if self.attributes is None:
+            return "Sum(%s)" % self.types
+        else:
+            return "Sum(%s, %s)" % (self.types, self.attributes)
+
+class Product(AST):
+    def __init__(self, fields):
+        self.fields = fields
+
+    def __repr__(self):
+        return "Product(%s)" % self.fields
+
+class VisitorBase(object):
+
+    def __init__(self, skip=0):
+        self.cache = {}
+        self.skip = skip
+
+    def visit(self, object, *args):
+        meth = self._dispatch(object)
+        if meth is None:
+            return
+        try:
+            meth(object, *args)
+        except Exception, err:
+            print "Error visiting", repr(object)
+            print err
+            traceback.print_exc()
+            # XXX hack
+            if hasattr(self, 'file'):
+                self.file.flush()
+            os._exit(1)
+
+    def _dispatch(self, object):
+        assert isinstance(object, AST), repr(object)
+        klass = object.__class__
+        meth = self.cache.get(klass)
+        if meth is None:
+            methname = "visit" + klass.__name__
+            if self.skip:
+                meth = getattr(self, methname, None)
+            else:
+                meth = getattr(self, methname)
+            self.cache[klass] = meth
+        return meth
+
+class Check(VisitorBase):
+
+    def __init__(self):
+        super(Check, self).__init__(skip=1)
+        self.cons = {}
+        self.errors = 0
+        self.types = {}
+
+    def visitModule(self, mod):
+        for dfn in mod.dfns:
+            self.visit(dfn)
+
+    def visitType(self, type):
+        self.visit(type.value, str(type.name))
+
+    def visitSum(self, sum, name):
+        for t in sum.types:
+            self.visit(t, name)
+
+    def visitConstructor(self, cons, name):
+        key = str(cons.name)
+        conflict = self.cons.get(key)
+        if conflict is None:
+            self.cons[key] = name
+        else:
+            print "Redefinition of constructor %s" % key
+            print "Defined in %s and %s" % (conflict, name)
+            self.errors += 1
+        for f in cons.fields:
+            self.visit(f, key)
+
+    def visitField(self, field, name):
+        key = str(field.type)
+        l = self.types.setdefault(key, [])
+        l.append(name)
+
+    def visitProduct(self, prod, name):
+        for f in prod.fields:
+            self.visit(f, name)
+
+def check(mod):
+    v = Check()
+    v.visit(mod)
+
+    for t in v.types:
+        if not mod.types.has_key(t) and not t in builtin_types:
+            v.errors += 1
+            uses = ", ".join(v.types[t])
+            print "Undefined type %s, used in %s" % (t, uses)
+
+    return not v.errors
+
+def parse(file):
+    scanner = ASDLScanner()
+    parser = ASDLParser()
+
+    buf = open(file).read()
+    tokens = scanner.tokenize(buf)
+    try:
+        return parser.parse(tokens)
+    except ASDLSyntaxError, err:
+        print err
+        lines = buf.split("\n")
+        print lines[err.lineno - 1] # lines starts at 0, files at 1
+
+if __name__ == "__main__":
+    import glob
+    import sys
+
+    if len(sys.argv) > 1:
+        files = sys.argv[1:]
+    else:
+        testdir = "tests"
+        files = glob.glob(testdir + "/*.asdl")
+
+    for file in files:
+        print file
+        mod = parse(file)
+        print "module", mod.name
+        print len(mod.dfns), "definitions"
+        if not check(mod):
+            print "Check failed"
+        else:
+            for dfn in mod.dfns:
+                print dfn.type
diff --git a/sys/src/cmd/python/Parser/asdl_c.py b/sys/src/cmd/python/Parser/asdl_c.py
new file mode 100755
index 000000000..2974d7b15
--- /dev/null
+++ b/sys/src/cmd/python/Parser/asdl_c.py
@@ -0,0 +1,782 @@
+#! /usr/bin/env python
+"""Generate C code from an ASDL description."""
+
+# TO DO
+# handle fields that have a type but no name
+
+import os, sys, traceback
+
+import asdl
+
+TABSIZE = 8
+MAX_COL = 80
+
+def get_c_type(name):
+    """Return a string for the C name of the type.
+
+    This function special cases the default types provided by asdl:
+    identifier, string, int, bool.
+    """
+    # XXX ack!  need to figure out where Id is useful and where string
+    if isinstance(name, asdl.Id):
+        name = name.value
+    if name in asdl.builtin_types:
+        return name
+    else:
+        return "%s_ty" % name
+
+def reflow_lines(s, depth):
+    """Reflow the line s indented depth tabs.
+
+    Return a sequence of lines where no line extends beyond MAX_COL
+    when properly indented.  The first line is properly indented based
+    exclusively on depth * TABSIZE.  All following lines -- these are
+    the reflowed lines generated by this function -- start at the same
+    column as the first character beyond the opening { in the first
+    line.
+    """
+    size = MAX_COL - depth * TABSIZE
+    if len(s) < size:
+        return [s]
+
+    lines = []
+    cur = s
+    padding = ""
+    while len(cur) > size:
+        i = cur.rfind(' ', 0, size)
+        # XXX this should be fixed for real
+        if i == -1 and 'GeneratorExp' in cur:
+            i = size + 3
+        assert i != -1, "Impossible line %d to reflow: %s" % (size, `s`)
+        lines.append(padding + cur[:i])
+        if len(lines) == 1:
+            # find new size based on brace
+            j = cur.find('{', 0, i)
+            if j >= 0:
+                j += 2 # account for the brace and the space after it
+                size -= j
+                padding = " " * j
+            else:
+                j = cur.find('(', 0, i)
+                if j >= 0:
+                    j += 1 # account for the paren (no space after it)
+                    size -= j
+                    padding = " " * j
+        cur = cur[i+1:]
+    else:
+        lines.append(padding + cur)
+    return lines
+
+def is_simple(sum):
+    """Return True if a sum is a simple.
+
+    A sum is simple if its types have no fields, e.g.
+    unaryop = Invert | Not | UAdd | USub
+    """
+
+    for t in sum.types:
+        if t.fields:
+            return False
+    return True
+
+class EmitVisitor(asdl.VisitorBase):
+    """Visit that emits lines"""
+
+    def __init__(self, file):
+        self.file = file
+        super(EmitVisitor, self).__init__()
+
+    def emit(self, s, depth, reflow=1):
+        # XXX reflow long lines?
+        if reflow:
+            lines = reflow_lines(s, depth)
+        else:
+            lines = [s]
+        for line in lines:
+            line = (" " * TABSIZE * depth) + line + "\n"
+            self.file.write(line)
+
+class TypeDefVisitor(EmitVisitor):
+    def visitModule(self, mod):
+        for dfn in mod.dfns:
+            self.visit(dfn)
+
+    def visitType(self, type, depth=0):
+        self.visit(type.value, type.name, depth)
+
+    def visitSum(self, sum, name, depth):
+        if is_simple(sum):
+            self.simple_sum(sum, name, depth)
+        else:
+            self.sum_with_constructors(sum, name, depth)
+
+    def simple_sum(self, sum, name, depth):
+        enum = []
+        for i in range(len(sum.types)):
+            type = sum.types[i]
+            enum.append("%s=%d" % (type.name, i + 1))
+        enums = ", ".join(enum)
+        ctype = get_c_type(name)
+        s = "typedef enum _%s { %s } %s;" % (name, enums, ctype)
+        self.emit(s, depth)
+        self.emit("", depth)
+
+    def sum_with_constructors(self, sum, name, depth):
+        ctype = get_c_type(name)
+        s = "typedef struct _%(name)s *%(ctype)s;" % locals()
+        self.emit(s, depth)
+        self.emit("", depth)
+
+    def visitProduct(self, product, name, depth):
+        ctype = get_c_type(name)
+        s = "typedef struct _%(name)s *%(ctype)s;" % locals()
+        self.emit(s, depth)
+        self.emit("", depth)
+
+class StructVisitor(EmitVisitor):
+    """Visitor to generate typdefs for AST."""
+
+    def visitModule(self, mod):
+        for dfn in mod.dfns:
+            self.visit(dfn)
+
+    def visitType(self, type, depth=0):
+        self.visit(type.value, type.name, depth)
+
+    def visitSum(self, sum, name, depth):
+        if not is_simple(sum):
+            self.sum_with_constructors(sum, name, depth)
+
+    def sum_with_constructors(self, sum, name, depth):
+        def emit(s, depth=depth):
+            self.emit(s % sys._getframe(1).f_locals, depth)
+        enum = []
+        for i in range(len(sum.types)):
+            type = sum.types[i]
+            enum.append("%s_kind=%d" % (type.name, i + 1))
+
+        emit("enum _%(name)s_kind {" + ", ".join(enum) + "};")
+
+        emit("struct _%(name)s {")
+        emit("enum _%(name)s_kind kind;", depth + 1)
+        emit("union {", depth + 1)
+        for t in sum.types:
+            self.visit(t, depth + 2)
+        emit("} v;", depth + 1)
+        for field in sum.attributes:
+            # rudimentary attribute handling
+            type = str(field.type)
+            assert type in asdl.builtin_types, type
+            emit("%s %s;" % (type, field.name), depth + 1);
+        emit("};")
+        emit("")
+
+    def visitConstructor(self, cons, depth):
+        if cons.fields:
+            self.emit("struct {", depth)
+            for f in cons.fields:
+                self.visit(f, depth + 1)
+            self.emit("} %s;" % cons.name, depth)
+            self.emit("", depth)
+        else:
+            # XXX not sure what I want here, nothing is probably fine
+            pass
+
+    def visitField(self, field, depth):
+        # XXX need to lookup field.type, because it might be something
+        # like a builtin...
+        ctype = get_c_type(field.type)
+        name = field.name
+        if field.seq:
+            if field.type.value in ('cmpop',):
+                self.emit("asdl_int_seq *%(name)s;" % locals(), depth)
+            else:
+                self.emit("asdl_seq *%(name)s;" % locals(), depth)
+        else:
+            self.emit("%(ctype)s %(name)s;" % locals(), depth)
+
+    def visitProduct(self, product, name, depth):
+        self.emit("struct _%(name)s {" % locals(), depth)
+        for f in product.fields:
+            self.visit(f, depth + 1)
+        self.emit("};", depth)
+        self.emit("", depth)
+
+class PrototypeVisitor(EmitVisitor):
+    """Generate function prototypes for the .h file"""
+
+    def visitModule(self, mod):
+        for dfn in mod.dfns:
+            self.visit(dfn)
+
+    def visitType(self, type):
+        self.visit(type.value, type.name)
+
+    def visitSum(self, sum, name):
+        if is_simple(sum):
+            pass # XXX
+        else:
+            for t in sum.types:
+                self.visit(t, name, sum.attributes)
+
+    def get_args(self, fields):
+        """Return list of C argument into, one for each field.
+
+        Argument info is 3-tuple of a C type, variable name, and flag
+        that is true if type can be NULL.
+        """
+        args = []
+        unnamed = {}
+        for f in fields:
+            if f.name is None:
+                name = f.type
+                c = unnamed[name] = unnamed.get(name, 0) + 1
+                if c > 1:
+                    name = "name%d" % (c - 1)
+            else:
+                name = f.name
+            # XXX should extend get_c_type() to handle this
+            if f.seq:
+                if f.type.value in ('cmpop',):
+                    ctype = "asdl_int_seq *"
+                else:
+                    ctype = "asdl_seq *"
+            else:
+                ctype = get_c_type(f.type)
+            args.append((ctype, name, f.opt or f.seq))
+        return args
+
+    def visitConstructor(self, cons, type, attrs):
+        args = self.get_args(cons.fields)
+        attrs = self.get_args(attrs)
+        ctype = get_c_type(type)
+        self.emit_function(cons.name, ctype, args, attrs)
+
+    def emit_function(self, name, ctype, args, attrs, union=1):
+        args = args + attrs
+        if args:
+            argstr = ", ".join(["%s %s" % (atype, aname)
+                                for atype, aname, opt in args])
+            argstr += ", PyArena *arena"
+        else:
+            argstr = "PyArena *arena"
+        margs = "a0"
+        for i in range(1, len(args)+1):
+            margs += ", a%d" % i
+        self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0,
+                reflow = 0)
+        self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), 0)
+
+    def visitProduct(self, prod, name):
+        self.emit_function(name, get_c_type(name),
+                           self.get_args(prod.fields), [], union=0)
+
+class FunctionVisitor(PrototypeVisitor):
+    """Visitor to generate constructor functions for AST."""
+
+    def emit_function(self, name, ctype, args, attrs, union=1):
+        def emit(s, depth=0, reflow=1):
+            self.emit(s, depth, reflow)
+        argstr = ", ".join(["%s %s" % (atype, aname)
+                            for atype, aname, opt in args + attrs])
+        if argstr:
+            argstr += ", PyArena *arena"
+        else:
+            argstr = "PyArena *arena"
+        self.emit("%s" % ctype, 0)
+        emit("%s(%s)" % (name, argstr))
+        emit("{")
+        emit("%s p;" % ctype, 1)
+        for argtype, argname, opt in args:
+            # XXX hack alert: false is allowed for a bool
+            if not opt and not (argtype == "bool" or argtype == "int"):
+                emit("if (!%s) {" % argname, 1)
+                emit("PyErr_SetString(PyExc_ValueError,", 2)
+                msg = "field %s is required for %s" % (argname, name)
+                emit('                "%s");' % msg,
+                     2, reflow=0)
+                emit('return NULL;', 2)
+                emit('}', 1)
+
+        emit("p = (%s)PyArena_Malloc(arena, sizeof(*p));" % ctype, 1);
+        emit("if (!p) {", 1)
+        emit("PyErr_NoMemory();", 2)
+        emit("return NULL;", 2)
+        emit("}", 1)
+        if union:
+            self.emit_body_union(name, args, attrs)
+        else:
+            self.emit_body_struct(name, args, attrs)
+        emit("return p;", 1)
+        emit("}")
+        emit("")
+
+    def emit_body_union(self, name, args, attrs):
+        def emit(s, depth=0, reflow=1):
+            self.emit(s, depth, reflow)
+        emit("p->kind = %s_kind;" % name, 1)
+        for argtype, argname, opt in args:
+            emit("p->v.%s.%s = %s;" % (name, argname, argname), 1)
+        for argtype, argname, opt in attrs:
+            emit("p->%s = %s;" % (argname, argname), 1)
+
+    def emit_body_struct(self, name, args, attrs):
+        def emit(s, depth=0, reflow=1):
+            self.emit(s, depth, reflow)
+        for argtype, argname, opt in args:
+            emit("p->%s = %s;" % (argname, argname), 1)
+        assert not attrs
+
+class PickleVisitor(EmitVisitor):
+
+    def visitModule(self, mod):
+        for dfn in mod.dfns:
+            self.visit(dfn)
+
+    def visitType(self, type):
+        self.visit(type.value, type.name)
+
+    def visitSum(self, sum, name):
+        pass
+
+    def visitProduct(self, sum, name):
+        pass
+
+    def visitConstructor(self, cons, name):
+        pass
+
+    def visitField(self, sum):
+        pass
+
+class MarshalPrototypeVisitor(PickleVisitor):
+
+    def prototype(self, sum, name):
+        ctype = get_c_type(name)
+        self.emit("static int marshal_write_%s(PyObject **, int *, %s);"
+                  % (name, ctype), 0)
+
+    visitProduct = visitSum = prototype
+
+class PyTypesDeclareVisitor(PickleVisitor):
+
+    def visitProduct(self, prod, name):
+        self.emit("static PyTypeObject *%s_type;" % name, 0)
+        self.emit("static PyObject* ast2obj_%s(void*);" % name, 0)
+        if prod.fields:
+            self.emit("static char *%s_fields[]={" % name,0)
+            for f in prod.fields:
+                self.emit('"%s",' % f.name, 1)
+            self.emit("};", 0)
+
+    def visitSum(self, sum, name):
+        self.emit("static PyTypeObject *%s_type;" % name, 0)
+        if sum.attributes:
+            self.emit("static char *%s_attributes[] = {" % name, 0)
+            for a in sum.attributes:
+                self.emit('"%s",' % a.name, 1)
+            self.emit("};", 0)
+        ptype = "void*"
+        if is_simple(sum):
+            ptype = get_c_type(name)
+            tnames = []
+            for t in sum.types:
+                tnames.append(str(t.name)+"_singleton")
+            tnames = ", *".join(tnames)
+            self.emit("static PyObject *%s;" % tnames, 0)
+        self.emit("static PyObject* ast2obj_%s(%s);" % (name, ptype), 0)
+        for t in sum.types:
+            self.visitConstructor(t, name)
+
+    def visitConstructor(self, cons, name):
+        self.emit("static PyTypeObject *%s_type;" % cons.name, 0)
+        if cons.fields:
+            self.emit("static char *%s_fields[]={" % cons.name, 0)
+            for t in cons.fields:
+                self.emit('"%s",' % t.name, 1)
+            self.emit("};",0)
+
+class PyTypesVisitor(PickleVisitor):
+
+    def visitModule(self, mod):
+        self.emit("""
+static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields)
+{
+    PyObject *fnames, *result;
+    int i;
+    if (num_fields) {
+        fnames = PyTuple_New(num_fields);
+        if (!fnames) return NULL;
+    } else {
+        fnames = Py_None;
+        Py_INCREF(Py_None);
+    }
+    for(i=0; i < num_fields; i++) {
+        PyObject *field = PyString_FromString(fields[i]);
+        if (!field) {
+            Py_DECREF(fnames);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(fnames, i, field);
+    }
+    result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}",
+                    type, base, "_fields", fnames, "__module__", "_ast");
+    Py_DECREF(fnames);
+    return (PyTypeObject*)result;
+}
+
+static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
+{
+    int i, result;
+    PyObject *s, *l = PyList_New(num_fields);
+    if (!l) return 0;
+    for(i = 0; i < num_fields; i++) {
+        s = PyString_FromString(attrs[i]);
+        if (!s) {
+            Py_DECREF(l);
+            return 0;
+        }
+        PyList_SET_ITEM(l, i, s);
+    }
+    result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0;
+    Py_DECREF(l);
+    return result;
+}
+
+static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*))
+{
+    int i, n = asdl_seq_LEN(seq);
+    PyObject *result = PyList_New(n);
+    PyObject *value;
+    if (!result)
+        return NULL;
+    for (i = 0; i < n; i++) {
+        value = func(asdl_seq_GET(seq, i));
+        if (!value) {
+            Py_DECREF(result);
+            return NULL;
+        }
+        PyList_SET_ITEM(result, i, value);
+    }
+    return result;
+}
+
+static PyObject* ast2obj_object(void *o)
+{
+    if (!o)
+        o = Py_None;
+    Py_INCREF((PyObject*)o);
+    return (PyObject*)o;
+}
+#define ast2obj_identifier ast2obj_object
+#define ast2obj_string ast2obj_object
+static PyObject* ast2obj_bool(bool b)
+{
+    return PyBool_FromLong(b);
+}
+
+static PyObject* ast2obj_int(bool b)
+{
+    return PyInt_FromLong(b);
+}
+""", 0, reflow=False)
+
+        self.emit("static int init_types(void)",0)
+        self.emit("{", 0)
+        self.emit("static int initialized;", 1)
+        self.emit("if (initialized) return 1;", 1)
+        self.emit('AST_type = make_type("AST", &PyBaseObject_Type, NULL, 0);', 1)
+        for dfn in mod.dfns:
+            self.visit(dfn)
+        self.emit("initialized = 1;", 1)
+        self.emit("return 1;", 1);
+        self.emit("}", 0)
+
+    def visitProduct(self, prod, name):
+        if prod.fields:
+            fields = name.value+"_fields"
+        else:
+            fields = "NULL"
+        self.emit('%s_type = make_type("%s", AST_type, %s, %d);' %
+                        (name, name, fields, len(prod.fields)), 1)
+        self.emit("if (!%s_type) return 0;" % name, 1)
+
+    def visitSum(self, sum, name):
+        self.emit('%s_type = make_type("%s", AST_type, NULL, 0);' % (name, name), 1)
+        self.emit("if (!%s_type) return 0;" % name, 1)
+        if sum.attributes:
+            self.emit("if (!add_attributes(%s_type, %s_attributes, %d)) return 0;" %
+                            (name, name, len(sum.attributes)), 1)
+        else:
+            self.emit("if (!add_attributes(%s_type, NULL, 0)) return 0;" % name, 1)
+        simple = is_simple(sum)
+        for t in sum.types:
+            self.visitConstructor(t, name, simple)
+
+    def visitConstructor(self, cons, name, simple):
+        if cons.fields:
+            fields = cons.name.value+"_fields"
+        else:
+            fields = "NULL"
+        self.emit('%s_type = make_type("%s", %s_type, %s, %d);' %
+                            (cons.name, cons.name, name, fields, len(cons.fields)), 1)
+        self.emit("if (!%s_type) return 0;" % cons.name, 1)
+        if simple:
+            self.emit("%s_singleton = PyType_GenericNew(%s_type, NULL, NULL);" %
+                             (cons.name, cons.name), 1)
+            self.emit("if (!%s_singleton) return 0;" % cons.name, 1)
+
+class ASTModuleVisitor(PickleVisitor):
+
+    def visitModule(self, mod):
+        self.emit("PyMODINIT_FUNC", 0)
+        self.emit("init_ast(void)", 0)
+        self.emit("{", 0)
+        self.emit("PyObject *m, *d;", 1)
+        self.emit("if (!init_types()) return;", 1)
+        self.emit('m = Py_InitModule3("_ast", NULL, NULL);', 1)
+        self.emit("if (!m) return;", 1)
+        self.emit("d = PyModule_GetDict(m);", 1)
+        self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)AST_type) < 0) return;', 1)
+        self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1)
+        self.emit("return;", 2)
+        # Value of version: "$Revision: 53490 $"
+        self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' % mod.version.value[12:-3], 1)
+        self.emit("return;", 2)
+        for dfn in mod.dfns:
+            self.visit(dfn)
+        self.emit("}", 0)
+
+    def visitProduct(self, prod, name):
+        self.addObj(name)
+
+    def visitSum(self, sum, name):
+        self.addObj(name)
+        for t in sum.types:
+            self.visitConstructor(t, name)
+
+    def visitConstructor(self, cons, name):
+        self.addObj(cons.name)
+
+    def addObj(self, name):
+        self.emit('if (PyDict_SetItemString(d, "%s", (PyObject*)%s_type) < 0) return;' % (name, name), 1)
+
+_SPECIALIZED_SEQUENCES = ('stmt', 'expr')
+
+def find_sequence(fields, doing_specialization):
+    """Return True if any field uses a sequence."""
+    for f in fields:
+        if f.seq:
+            if not doing_specialization:
+                return True
+            if str(f.type) not in _SPECIALIZED_SEQUENCES:
+                return True
+    return False
+
+def has_sequence(types, doing_specialization):
+    for t in types:
+        if find_sequence(t.fields, doing_specialization):
+            return True
+    return False
+
+
+class StaticVisitor(PickleVisitor):
+    CODE = '''Very simple, always emit this static code.  Overide CODE'''
+
+    def visit(self, object):
+        self.emit(self.CODE, 0, reflow=False)
+
+class ObjVisitor(PickleVisitor):
+
+    def func_begin(self, name):
+        ctype = get_c_type(name)
+        self.emit("PyObject*", 0)
+        self.emit("ast2obj_%s(void* _o)" % (name), 0)
+        self.emit("{", 0)
+        self.emit("%s o = (%s)_o;" % (ctype, ctype), 1)
+        self.emit("PyObject *result = NULL, *value = NULL;", 1)
+        self.emit('if (!o) {', 1)
+        self.emit("Py_INCREF(Py_None);", 2)
+        self.emit('return Py_None;', 2)
+        self.emit("}", 1)
+        self.emit('', 0)
+
+    def func_end(self):
+        self.emit("return result;", 1)
+        self.emit("failed:", 0)
+        self.emit("Py_XDECREF(value);", 1)
+        self.emit("Py_XDECREF(result);", 1)
+        self.emit("return NULL;", 1)
+        self.emit("}", 0)
+        self.emit("", 0)
+
+    def visitSum(self, sum, name):
+        if is_simple(sum):
+            self.simpleSum(sum, name)
+            return
+        self.func_begin(name)
+        self.emit("switch (o->kind) {", 1)
+        for i in range(len(sum.types)):
+            t = sum.types[i]
+            self.visitConstructor(t, i + 1, name)
+        self.emit("}", 1)
+        for a in sum.attributes:
+            self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1)
+            self.emit("if (!value) goto failed;", 1)
+            self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1)
+            self.emit('goto failed;', 2)
+            self.emit('Py_DECREF(value);', 1)
+        self.func_end()
+
+    def simpleSum(self, sum, name):
+        self.emit("PyObject* ast2obj_%s(%s_ty o)" % (name, name), 0)
+        self.emit("{", 0)
+        self.emit("switch(o) {", 1)
+        for t in sum.types:
+            self.emit("case %s:" % t.name, 2)
+            self.emit("Py_INCREF(%s_singleton);" % t.name, 3)
+            self.emit("return %s_singleton;" % t.name, 3)
+        self.emit("}", 1)
+        self.emit("return NULL; /* cannot happen */", 1)
+        self.emit("}", 0)
+
+    def visitProduct(self, prod, name):
+        self.func_begin(name)
+        self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % name, 1);
+        self.emit("if (!result) return NULL;", 1)
+        for field in prod.fields:
+            self.visitField(field, name, 1, True)
+        self.func_end()
+
+    def visitConstructor(self, cons, enum, name):
+        self.emit("case %s_kind:" % cons.name, 1)
+        self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % cons.name, 2);
+        self.emit("if (!result) goto failed;", 2)
+        for f in cons.fields:
+            self.visitField(f, cons.name, 2, False)
+        self.emit("break;", 2)
+
+    def visitField(self, field, name, depth, product):
+        def emit(s, d):
+            self.emit(s, depth + d)
+        if product:
+            value = "o->%s" % field.name
+        else:
+            value = "o->v.%s.%s" % (name, field.name)
+        self.set(field, value, depth)
+        emit("if (!value) goto failed;", 0)
+        emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0)
+        emit("goto failed;", 1)
+        emit("Py_DECREF(value);", 0)
+
+    def emitSeq(self, field, value, depth, emit):
+        emit("seq = %s;" % value, 0)
+        emit("n = asdl_seq_LEN(seq);", 0)
+        emit("value = PyList_New(n);", 0)
+        emit("if (!value) goto failed;", 0)
+        emit("for (i = 0; i < n; i++) {", 0)
+        self.set("value", field, "asdl_seq_GET(seq, i)", depth + 1)
+        emit("if (!value1) goto failed;", 1)
+        emit("PyList_SET_ITEM(value, i, value1);", 1)
+        emit("value1 = NULL;", 1)
+        emit("}", 0)
+
+    def set(self, field, value, depth):
+        if field.seq:
+            # XXX should really check for is_simple, but that requires a symbol table
+            if field.type.value == "cmpop":
+                # While the sequence elements are stored as void*,
+                # ast2obj_cmpop expects an enum
+                self.emit("{", depth)
+                self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1)
+                self.emit("value = PyList_New(n);", depth+1)
+                self.emit("if (!value) goto failed;", depth+1)
+                self.emit("for(i = 0; i < n; i++)", depth+1)
+                # This cannot fail, so no need for error handling
+                self.emit("PyList_SET_ITEM(value, i, ast2obj_cmpop((cmpop_ty)asdl_seq_GET(%s, i)));" % value,
+                          depth+2, reflow=False)
+                self.emit("}", depth)
+            else:
+                self.emit("value = ast2obj_list(%s, ast2obj_%s);" % (value, field.type), depth)
+        else:
+            ctype = get_c_type(field.type)
+            self.emit("value = ast2obj_%s(%s);" % (field.type, value), depth, reflow=False)
+
+
+class PartingShots(StaticVisitor):
+
+    CODE = """
+PyObject* PyAST_mod2obj(mod_ty t)
+{
+    init_types();
+    return ast2obj_mod(t);
+}
+"""
+
+class ChainOfVisitors:
+    def __init__(self, *visitors):
+        self.visitors = visitors
+
+    def visit(self, object):
+        for v in self.visitors:
+            v.visit(object)
+            v.emit("", 0)
+
+def main(srcfile):
+    argv0 = sys.argv[0]
+    components = argv0.split(os.sep)
+    argv0 = os.sep.join(components[-2:])
+    auto_gen_msg = '/* File automatically generated by %s */\n' % argv0
+    mod = asdl.parse(srcfile)
+    if not asdl.check(mod):
+        sys.exit(1)
+    if INC_DIR:
+        p = "%s/%s-ast.h" % (INC_DIR, mod.name)
+        f = open(p, "wb")
+        print >> f, auto_gen_msg
+        print >> f, '#include "asdl.h"\n'
+        c = ChainOfVisitors(TypeDefVisitor(f),
+                            StructVisitor(f),
+                            PrototypeVisitor(f),
+                            )
+        c.visit(mod)
+        print >>f, "PyObject* PyAST_mod2obj(mod_ty t);"
+        f.close()
+
+    if SRC_DIR:
+        p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c")
+        f = open(p, "wb")
+        print >> f, auto_gen_msg
+        print >> f, '#include "Python.h"'
+        print >> f, '#include "%s-ast.h"' % mod.name
+        print >> f
+        print >>f, "static PyTypeObject* AST_type;"
+        v = ChainOfVisitors(
+            PyTypesDeclareVisitor(f),
+            PyTypesVisitor(f),
+            FunctionVisitor(f),
+            ObjVisitor(f),
+            ASTModuleVisitor(f),
+            PartingShots(f),
+            )
+        v.visit(mod)
+        f.close()
+
+if __name__ == "__main__":
+    import sys
+    import getopt
+
+    INC_DIR = ''
+    SRC_DIR = ''
+    opts, args = getopt.getopt(sys.argv[1:], "h:c:")
+    if len(opts) != 1:
+        print "Must specify exactly one output file"
+        sys.exit(1)
+    for o, v in opts:
+        if o == '-h':
+            INC_DIR = v
+        if o == '-c':
+            SRC_DIR = v
+    if len(args) != 1:
+        print "Must specify single input file"
+        sys.exit(1)
+    main(args[0])
diff --git a/sys/src/cmd/python/Parser/bitset.c b/sys/src/cmd/python/Parser/bitset.c
new file mode 100644
index 000000000..b5543b81d
--- /dev/null
+++ b/sys/src/cmd/python/Parser/bitset.c
@@ -0,0 +1,66 @@
+
+/* Bitset primitives used by the parser generator */
+
+#include "pgenheaders.h"
+#include "bitset.h"
+
+bitset
+newbitset(int nbits)
+{
+	int nbytes = NBYTES(nbits);
+	bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) *  nbytes);
+	
+	if (ss == NULL)
+		Py_FatalError("no mem for bitset");
+	
+	ss += nbytes;
+	while (--nbytes >= 0)
+		*--ss = 0;
+	return ss;
+}
+
+void
+delbitset(bitset ss)
+{
+	PyObject_FREE(ss);
+}
+
+int
+addbit(bitset ss, int ibit)
+{
+	int ibyte = BIT2BYTE(ibit);
+	BYTE mask = BIT2MASK(ibit);
+	
+	if (ss[ibyte] & mask)
+		return 0; /* Bit already set */
+	ss[ibyte] |= mask;
+	return 1;
+}
+
+#if 0 /* Now a macro */
+int
+testbit(bitset ss, int ibit)
+{
+	return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
+}
+#endif
+
+int
+samebitset(bitset ss1, bitset ss2, int nbits)
+{
+	int i;
+	
+	for (i = NBYTES(nbits); --i >= 0; )
+		if (*ss1++ != *ss2++)
+			return 0;
+	return 1;
+}
+
+void
+mergebitset(bitset ss1, bitset ss2, int nbits)
+{
+	int i;
+	
+	for (i = NBYTES(nbits); --i >= 0; )
+		*ss1++ |= *ss2++;
+}
diff --git a/sys/src/cmd/python/Parser/firstsets.c b/sys/src/cmd/python/Parser/firstsets.c
new file mode 100644
index 000000000..00467b31a
--- /dev/null
+++ b/sys/src/cmd/python/Parser/firstsets.c
@@ -0,0 +1,113 @@
+
+/* Computation of FIRST stets */
+
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "token.h"
+
+extern int Py_DebugFlag;
+
+/* Forward */
+static void calcfirstset(grammar *, dfa *);
+
+void
+addfirstsets(grammar *g)
+{
+	int i;
+	dfa *d;
+
+	if (Py_DebugFlag)
+		printf("Adding FIRST sets ...\n");
+	for (i = 0; i < g->g_ndfas; i++) {
+		d = &g->g_dfa[i];
+		if (d->d_first == NULL)
+			calcfirstset(g, d);
+	}
+}
+
+static void
+calcfirstset(grammar *g, dfa *d)
+{
+	int i, j;
+	state *s;
+	arc *a;
+	int nsyms;
+	int *sym;
+	int nbits;
+	static bitset dummy;
+	bitset result;
+	int type;
+	dfa *d1;
+	label *l0;
+	
+	if (Py_DebugFlag)
+		printf("Calculate FIRST set for '%s'\n", d->d_name);
+	
+	if (dummy == NULL)
+		dummy = newbitset(1);
+	if (d->d_first == dummy) {
+		fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
+		return;
+	}
+	if (d->d_first != NULL) {
+		fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
+			d->d_name);
+	}
+	d->d_first = dummy;
+	
+	l0 = g->g_ll.ll_label;
+	nbits = g->g_ll.ll_nlabels;
+	result = newbitset(nbits);
+	
+	sym = (int *)PyObject_MALLOC(sizeof(int));
+	if (sym == NULL)
+		Py_FatalError("no mem for new sym in calcfirstset");
+	nsyms = 1;
+	sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
+	
+	s = &d->d_state[d->d_initial];
+	for (i = 0; i < s->s_narcs; i++) {
+		a = &s->s_arc[i];
+		for (j = 0; j < nsyms; j++) {
+			if (sym[j] == a->a_lbl)
+				break;
+		}
+		if (j >= nsyms) { /* New label */
+			sym = (int *)PyObject_REALLOC(sym, 
+                                                sizeof(int) * (nsyms + 1));
+			if (sym == NULL)
+				Py_FatalError(
+				    "no mem to resize sym in calcfirstset");
+			sym[nsyms++] = a->a_lbl;
+			type = l0[a->a_lbl].lb_type;
+			if (ISNONTERMINAL(type)) {
+				d1 = PyGrammar_FindDFA(g, type);
+				if (d1->d_first == dummy) {
+					fprintf(stderr,
+						"Left-recursion below '%s'\n",
+						d->d_name);
+				}
+				else {
+					if (d1->d_first == NULL)
+						calcfirstset(g, d1);
+					mergebitset(result,
+						    d1->d_first, nbits);
+				}
+			}
+			else if (ISTERMINAL(type)) {
+				addbit(result, a->a_lbl);
+			}
+		}
+	}
+	d->d_first = result;
+	if (Py_DebugFlag) {
+		printf("FIRST set for '%s': {", d->d_name);
+		for (i = 0; i < nbits; i++) {
+			if (testbit(result, i))
+				printf(" %s", PyGrammar_LabelRepr(&l0[i]));
+		}
+		printf(" }\n");
+	}
+
+	PyObject_FREE(sym);
+}
diff --git a/sys/src/cmd/python/Parser/grammar.c b/sys/src/cmd/python/Parser/grammar.c
new file mode 100644
index 000000000..9e7c49aab
--- /dev/null
+++ b/sys/src/cmd/python/Parser/grammar.c
@@ -0,0 +1,254 @@
+
+/* Grammar implementation */
+
+#include "Python.h"
+#include "pgenheaders.h"
+
+#include <ctype.h>
+
+#include "token.h"
+#include "grammar.h"
+
+#ifdef RISCOS
+#include <unixlib.h>
+#endif
+
+extern int Py_DebugFlag;
+
+grammar *
+newgrammar(int start)
+{
+	grammar *g;
+	
+	g = (grammar *)PyObject_MALLOC(sizeof(grammar));
+	if (g == NULL)
+		Py_FatalError("no mem for new grammar");
+	g->g_ndfas = 0;
+	g->g_dfa = NULL;
+	g->g_start = start;
+	g->g_ll.ll_nlabels = 0;
+	g->g_ll.ll_label = NULL;
+	g->g_accel = 0;
+	return g;
+}
+
+dfa *
+adddfa(grammar *g, int type, char *name)
+{
+	dfa *d;
+	
+	g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, 
+                                            sizeof(dfa) * (g->g_ndfas + 1));
+	if (g->g_dfa == NULL)
+		Py_FatalError("no mem to resize dfa in adddfa");
+	d = &g->g_dfa[g->g_ndfas++];
+	d->d_type = type;
+	d->d_name = strdup(name);
+	d->d_nstates = 0;
+	d->d_state = NULL;
+	d->d_initial = -1;
+	d->d_first = NULL;
+	return d; /* Only use while fresh! */
+}
+
+int
+addstate(dfa *d)
+{
+	state *s;
+	
+	d->d_state = (state *)PyObject_REALLOC(d->d_state,
+				      sizeof(state) * (d->d_nstates + 1));
+	if (d->d_state == NULL)
+		Py_FatalError("no mem to resize state in addstate");
+	s = &d->d_state[d->d_nstates++];
+	s->s_narcs = 0;
+	s->s_arc = NULL;
+	s->s_lower = 0;
+	s->s_upper = 0;
+	s->s_accel = NULL;
+	s->s_accept = 0;
+	return s - d->d_state;
+}
+
+void
+addarc(dfa *d, int from, int to, int lbl)
+{
+	state *s;
+	arc *a;
+	
+	assert(0 <= from && from < d->d_nstates);
+	assert(0 <= to && to < d->d_nstates);
+	
+	s = &d->d_state[from];
+	s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
+	if (s->s_arc == NULL)
+		Py_FatalError("no mem to resize arc list in addarc");
+	a = &s->s_arc[s->s_narcs++];
+	a->a_lbl = lbl;
+	a->a_arrow = to;
+}
+
+int
+addlabel(labellist *ll, int type, char *str)
+{
+	int i;
+	label *lb;
+	
+	for (i = 0; i < ll->ll_nlabels; i++) {
+		if (ll->ll_label[i].lb_type == type &&
+			strcmp(ll->ll_label[i].lb_str, str) == 0)
+			return i;
+	}
+	ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
+					sizeof(label) * (ll->ll_nlabels + 1));
+	if (ll->ll_label == NULL)
+		Py_FatalError("no mem to resize labellist in addlabel");
+	lb = &ll->ll_label[ll->ll_nlabels++];
+	lb->lb_type = type;
+	lb->lb_str = strdup(str);
+	if (Py_DebugFlag)
+		printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
+		       PyGrammar_LabelRepr(lb));
+	return lb - ll->ll_label;
+}
+
+/* Same, but rather dies than adds */
+
+int
+findlabel(labellist *ll, int type, char *str)
+{
+	int i;
+	
+	for (i = 0; i < ll->ll_nlabels; i++) {
+		if (ll->ll_label[i].lb_type == type /*&&
+			strcmp(ll->ll_label[i].lb_str, str) == 0*/)
+			return i;
+	}
+	fprintf(stderr, "Label %d/'%s' not found\n", type, str);
+	Py_FatalError("grammar.c:findlabel()");
+	return 0; /* Make gcc -Wall happy */
+}
+
+/* Forward */
+static void translabel(grammar *, label *);
+
+void
+translatelabels(grammar *g)
+{
+	int i;
+
+#ifdef Py_DEBUG
+	printf("Translating labels ...\n");
+#endif
+	/* Don't translate EMPTY */
+	for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
+		translabel(g, &g->g_ll.ll_label[i]);
+}
+
+static void
+translabel(grammar *g, label *lb)
+{
+	int i;
+	
+	if (Py_DebugFlag)
+		printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
+	
+	if (lb->lb_type == NAME) {
+		for (i = 0; i < g->g_ndfas; i++) {
+			if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
+				if (Py_DebugFlag)
+					printf(
+					    "Label %s is non-terminal %d.\n",
+					    lb->lb_str,
+					    g->g_dfa[i].d_type);
+				lb->lb_type = g->g_dfa[i].d_type;
+				free(lb->lb_str);
+				lb->lb_str = NULL;
+				return;
+			}
+		}
+		for (i = 0; i < (int)N_TOKENS; i++) {
+			if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
+				if (Py_DebugFlag)
+					printf("Label %s is terminal %d.\n",
+						lb->lb_str, i);
+				lb->lb_type = i;
+				free(lb->lb_str);
+				lb->lb_str = NULL;
+				return;
+			}
+		}
+		printf("Can't translate NAME label '%s'\n", lb->lb_str);
+		return;
+	}
+	
+	if (lb->lb_type == STRING) {
+		if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
+		    lb->lb_str[1] == '_') {
+			char *p;
+			char *src;
+			char *dest;
+			size_t name_len;
+			if (Py_DebugFlag)
+				printf("Label %s is a keyword\n", lb->lb_str);
+			lb->lb_type = NAME;
+			src = lb->lb_str + 1;
+			p = strchr(src, '\'');
+			if (p)
+				name_len = p - src;
+			else
+				name_len = strlen(src);
+			dest = (char *)malloc(name_len + 1);
+			if (!dest) {
+				printf("Can't alloc dest '%s'\n", src);
+				return;
+			}
+			strncpy(dest, src, name_len);
+			dest[name_len] = '\0';
+			free(lb->lb_str);
+			lb->lb_str = dest;
+		}
+		else if (lb->lb_str[2] == lb->lb_str[0]) {
+			int type = (int) PyToken_OneChar(lb->lb_str[1]);
+			if (type != OP) {
+				lb->lb_type = type;
+				free(lb->lb_str);
+				lb->lb_str = NULL;
+			}
+			else
+				printf("Unknown OP label %s\n",
+					lb->lb_str);
+		}
+		else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
+			int type = (int) PyToken_TwoChars(lb->lb_str[1],
+						   lb->lb_str[2]);
+			if (type != OP) {
+				lb->lb_type = type;
+				free(lb->lb_str);
+				lb->lb_str = NULL;
+			}
+			else
+				printf("Unknown OP label %s\n",
+					lb->lb_str);
+		}
+		else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
+			int type = (int) PyToken_ThreeChars(lb->lb_str[1],
+							    lb->lb_str[2],
+							    lb->lb_str[3]);
+			if (type != OP) {
+				lb->lb_type = type;
+				free(lb->lb_str);
+				lb->lb_str = NULL;
+			}
+			else
+				printf("Unknown OP label %s\n",
+					lb->lb_str);
+		}
+		else
+			printf("Can't translate STRING label %s\n",
+				lb->lb_str);
+	}
+	else
+		printf("Can't translate label '%s'\n",
+		       PyGrammar_LabelRepr(lb));
+}
diff --git a/sys/src/cmd/python/Parser/grammar.mak b/sys/src/cmd/python/Parser/grammar.mak
new file mode 100644
index 000000000..55f028ffb
--- /dev/null
+++ b/sys/src/cmd/python/Parser/grammar.mak
@@ -0,0 +1,45 @@
+# This manages to rebuild graminit.{h, c} under MSVC 6 (Windows), via
+#
+#     nmake /f grammar.mak
+#
+# You may also need to copy python23.dll into this directory, or get
+# it on your search path.
+#
+# The intermediate files can be nuked afterwards:
+#
+#     nmake /f grammar.mak clean
+#
+# I don't understand the maze of preprocessor #define's on Windows, and
+# as a result this requires linking with python23.lib, so it's of no use
+# for bootstrapping (the cause appears to be a useless-- in this
+# particular case --pragma in PC\pyconfig.h, which demands that
+# python23.lib get linked in).
+
+LIBS= ..\PCbuild\python25.lib
+
+CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD
+
+GRAMMAR_H= ..\Include\graminit.h
+GRAMMAR_C= ..\Python\graminit.c
+GRAMMAR_INPUT= ..\Grammar\Grammar
+
+PGEN= pgen.exe
+
+POBJS= acceler.obj grammar1.obj listnode.obj node.obj parser.obj \
+       parsetok.obj tokenizer.obj bitset.obj metagrammar.obj
+
+PARSER_OBJS= $(POBJS) myreadline.obj
+
+PGOBJS= firstsets.obj grammar.obj pgen.obj printgrammar.obj pgenmain.obj
+
+PGENOBJS= $(POBJS) $(PGOBJS)
+
+$(GRAMMAR_H) $(GRAMMAR_C): $(PGEN) $(GRAMMAR_INPUT)
+		$(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C)
+
+$(PGEN):	$(PGENOBJS)
+		$(CC) $(PGENOBJS) $(LIBS) /Fe$(PGEN)
+
+clean:
+        del *.obj
+        del $(PGEN)
diff --git a/sys/src/cmd/python/Parser/grammar1.c b/sys/src/cmd/python/Parser/grammar1.c
new file mode 100644
index 000000000..b76719a1d
--- /dev/null
+++ b/sys/src/cmd/python/Parser/grammar1.c
@@ -0,0 +1,57 @@
+
+/* Grammar subroutines needed by parser */
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "token.h"
+
+/* Return the DFA for the given type */
+
+dfa *
+PyGrammar_FindDFA(grammar *g, register int type)
+{
+	register dfa *d;
+#if 1
+	/* Massive speed-up */
+	d = &g->g_dfa[type - NT_OFFSET];
+	assert(d->d_type == type);
+	return d;
+#else
+	/* Old, slow version */
+	register int i;
+	
+	for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
+		if (d->d_type == type)
+			return d;
+	}
+	assert(0);
+	/* NOTREACHED */
+#endif
+}
+
+char *
+PyGrammar_LabelRepr(label *lb)
+{
+	static char buf[100];
+	
+	if (lb->lb_type == ENDMARKER)
+		return "EMPTY";
+	else if (ISNONTERMINAL(lb->lb_type)) {
+		if (lb->lb_str == NULL) {
+			PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
+			return buf;
+		}
+		else
+			return lb->lb_str;
+	}
+	else {
+		if (lb->lb_str == NULL)
+			return _PyParser_TokenNames[lb->lb_type];
+		else {
+			PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
+				_PyParser_TokenNames[lb->lb_type], lb->lb_str);
+			return buf;
+		}
+	}
+}
diff --git a/sys/src/cmd/python/Parser/intrcheck.c b/sys/src/cmd/python/Parser/intrcheck.c
new file mode 100644
index 000000000..e0f3252db
--- /dev/null
+++ b/sys/src/cmd/python/Parser/intrcheck.c
@@ -0,0 +1,176 @@
+
+/* Check for interrupts */
+
+#include "Python.h"
+
+#ifdef QUICKWIN
+
+#include <io.h>
+
+void
+PyOS_InitInterrupts(void)
+{
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+	_wyield();
+}
+
+#define OK
+
+#endif /* QUICKWIN */
+
+#if defined(_M_IX86) && !defined(__QNX__)
+#include <io.h>
+#endif
+
+#if defined(MSDOS) && !defined(QUICKWIN)
+
+#ifdef __GNUC__
+
+/* This is for DJGPP's GO32 extender.  I don't know how to trap
+ * control-C  (There's no API for ctrl-C, and I don't want to mess with
+ * the interrupt vectors.)  However, this DOES catch control-break.
+ * --Amrit
+ */
+
+#include <go32.h>
+
+void
+PyOS_InitInterrupts(void)
+{
+	_go32_want_ctrl_break(1 /* TRUE */);
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+	return _go32_was_ctrl_break_hit();
+}
+
+#else /* !__GNUC__ */
+
+/* This might work for MS-DOS (untested though): */
+
+void
+PyOS_InitInterrupts(void)
+{
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+	int interrupted = 0;
+	while (kbhit()) {
+		if (getch() == '\003')
+			interrupted = 1;
+	}
+	return interrupted;
+}
+
+#endif /* __GNUC__ */
+
+#define OK
+
+#endif /* MSDOS && !QUICKWIN */
+
+
+#ifndef OK
+
+/* Default version -- for real operating systems and for Standard C */
+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+
+static int interrupted;
+
+void
+PyErr_SetInterrupt(void)
+{
+	interrupted = 1;
+}
+
+extern int PyErr_CheckSignals(void);
+
+static int
+checksignals_witharg(void * arg)
+{
+	return PyErr_CheckSignals();
+}
+
+static void
+intcatcher(int sig)
+{
+	extern void Py_Exit(int);
+	static char message[] =
+"python: to interrupt a truly hanging Python program, interrupt once more.\n";
+	switch (interrupted++) {
+	case 0:
+		break;
+	case 1:
+#ifdef RISCOS
+		fprintf(stderr, message);
+#else
+		write(2, message, strlen(message));
+#endif
+		break;
+	case 2:
+		interrupted = 0;
+		Py_Exit(1);
+		break;
+	}
+	PyOS_setsig(SIGINT, intcatcher);
+	Py_AddPendingCall(checksignals_witharg, NULL);
+}
+
+static void (*old_siginthandler)(int) = SIG_DFL;
+
+void
+PyOS_InitInterrupts(void)
+{
+	if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
+		PyOS_setsig(SIGINT, intcatcher);
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+	PyOS_setsig(SIGINT, old_siginthandler);
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+	if (!interrupted)
+		return 0;
+	interrupted = 0;
+	return 1;
+}
+
+#endif /* !OK */
+
+void
+PyOS_AfterFork(void)
+{
+#ifdef WITH_THREAD
+	PyEval_ReInitThreads();
+#endif
+}
diff --git a/sys/src/cmd/python/Parser/listnode.c b/sys/src/cmd/python/Parser/listnode.c
new file mode 100644
index 000000000..c0b3b666f
--- /dev/null
+++ b/sys/src/cmd/python/Parser/listnode.c
@@ -0,0 +1,66 @@
+
+/* List a node on a file */
+
+#include "pgenheaders.h"
+#include "token.h"
+#include "node.h"
+
+/* Forward */
+static void list1node(FILE *, node *);
+static void listnode(FILE *, node *);
+
+void
+PyNode_ListTree(node *n)
+{
+	listnode(stdout, n);
+}
+
+static int level, atbol;
+
+static void
+listnode(FILE *fp, node *n)
+{
+	level = 0;
+	atbol = 1;
+	list1node(fp, n);
+}
+
+static void
+list1node(FILE *fp, node *n)
+{
+	if (n == 0)
+		return;
+	if (ISNONTERMINAL(TYPE(n))) {
+		int i;
+		for (i = 0; i < NCH(n); i++)
+			list1node(fp, CHILD(n, i));
+	}
+	else if (ISTERMINAL(TYPE(n))) {
+		switch (TYPE(n)) {
+		case INDENT:
+			++level;
+			break;
+		case DEDENT:
+			--level;
+			break;
+		default:
+			if (atbol) {
+				int i;
+				for (i = 0; i < level; ++i)
+					fprintf(fp, "\t");
+				atbol = 0;
+			}
+			if (TYPE(n) == NEWLINE) {
+				if (STR(n) != NULL)
+					fprintf(fp, "%s", STR(n));
+				fprintf(fp, "\n");
+				atbol = 1;
+			}
+			else
+				fprintf(fp, "%s ", STR(n));
+			break;
+		}
+	}
+	else
+		fprintf(fp, "? ");
+}
diff --git a/sys/src/cmd/python/Parser/metagrammar.c b/sys/src/cmd/python/Parser/metagrammar.c
new file mode 100644
index 000000000..b61bc6d66
--- /dev/null
+++ b/sys/src/cmd/python/Parser/metagrammar.c
@@ -0,0 +1,159 @@
+
+#include "pgenheaders.h"
+#include "metagrammar.h"
+#include "grammar.h"
+#include "pgen.h"
+static arc arcs_0_0[3] = {
+	{2, 0},
+	{3, 0},
+	{4, 1},
+};
+static arc arcs_0_1[1] = {
+	{0, 1},
+};
+static state states_0[2] = {
+	{3, arcs_0_0},
+	{1, arcs_0_1},
+};
+static arc arcs_1_0[1] = {
+	{5, 1},
+};
+static arc arcs_1_1[1] = {
+	{6, 2},
+};
+static arc arcs_1_2[1] = {
+	{7, 3},
+};
+static arc arcs_1_3[1] = {
+	{3, 4},
+};
+static arc arcs_1_4[1] = {
+	{0, 4},
+};
+static state states_1[5] = {
+	{1, arcs_1_0},
+	{1, arcs_1_1},
+	{1, arcs_1_2},
+	{1, arcs_1_3},
+	{1, arcs_1_4},
+};
+static arc arcs_2_0[1] = {
+	{8, 1},
+};
+static arc arcs_2_1[2] = {
+	{9, 0},
+	{0, 1},
+};
+static state states_2[2] = {
+	{1, arcs_2_0},
+	{2, arcs_2_1},
+};
+static arc arcs_3_0[1] = {
+	{10, 1},
+};
+static arc arcs_3_1[2] = {
+	{10, 1},
+	{0, 1},
+};
+static state states_3[2] = {
+	{1, arcs_3_0},
+	{2, arcs_3_1},
+};
+static arc arcs_4_0[2] = {
+	{11, 1},
+	{13, 2},
+};
+static arc arcs_4_1[1] = {
+	{7, 3},
+};
+static arc arcs_4_2[3] = {
+	{14, 4},
+	{15, 4},
+	{0, 2},
+};
+static arc arcs_4_3[1] = {
+	{12, 4},
+};
+static arc arcs_4_4[1] = {
+	{0, 4},
+};
+static state states_4[5] = {
+	{2, arcs_4_0},
+	{1, arcs_4_1},
+	{3, arcs_4_2},
+	{1, arcs_4_3},
+	{1, arcs_4_4},
+};
+static arc arcs_5_0[3] = {
+	{5, 1},
+	{16, 1},
+	{17, 2},
+};
+static arc arcs_5_1[1] = {
+	{0, 1},
+};
+static arc arcs_5_2[1] = {
+	{7, 3},
+};
+static arc arcs_5_3[1] = {
+	{18, 1},
+};
+static state states_5[4] = {
+	{3, arcs_5_0},
+	{1, arcs_5_1},
+	{1, arcs_5_2},
+	{1, arcs_5_3},
+};
+static dfa dfas[6] = {
+	{256, "MSTART", 0, 2, states_0,
+	 "\070\000\000"},
+	{257, "RULE", 0, 5, states_1,
+	 "\040\000\000"},
+	{258, "RHS", 0, 2, states_2,
+	 "\040\010\003"},
+	{259, "ALT", 0, 2, states_3,
+	 "\040\010\003"},
+	{260, "ITEM", 0, 5, states_4,
+	 "\040\010\003"},
+	{261, "ATOM", 0, 4, states_5,
+	 "\040\000\003"},
+};
+static label labels[19] = {
+	{0, "EMPTY"},
+	{256, 0},
+	{257, 0},
+	{4, 0},
+	{0, 0},
+	{1, 0},
+	{11, 0},
+	{258, 0},
+	{259, 0},
+	{18, 0},
+	{260, 0},
+	{9, 0},
+	{10, 0},
+	{261, 0},
+	{16, 0},
+	{14, 0},
+	{3, 0},
+	{7, 0},
+	{8, 0},
+};
+static grammar _PyParser_Grammar = {
+	6,
+	dfas,
+	{19, labels},
+	256
+};
+
+grammar *
+meta_grammar(void)
+{
+	return &_PyParser_Grammar;
+}
+
+grammar *
+Py_meta_grammar(void)
+{
+  return meta_grammar();
+}
diff --git a/sys/src/cmd/python/Parser/mkfile b/sys/src/cmd/python/Parser/mkfile
new file mode 100644
index 000000000..01557576e
--- /dev/null
+++ b/sys/src/cmd/python/Parser/mkfile
@@ -0,0 +1,44 @@
+APE=/sys/src/ape
+<$APE/config
+
+LIB=/$objtype/lib/ape/libpython.a
+
+OFILES=\
+	acceler.$O\
+	bitset.$O\
+	firstsets.$O\
+	grammar1.$O\
+	listnode.$O\
+	metagrammar.$O\
+	myreadline.$O\
+	node.$O\
+	parser.$O\
+	parsetok.$O\
+	tokenizer.$O\
+
+CLEANFILES=$O.pgen
+
+</sys/src/cmd/mksyslib
+
+CFLAGS=-c -I.. -I../Include -DT$objtype -DPy_BUILD_CORE -DNDEBUG
+
+PGENOFILES=\
+	acceler.$O\
+	bitset.$O\
+	firstsets.$O\
+	grammar.$O\
+	grammar1.$O\
+	listnode.$O\
+	metagrammar.$O\
+	node.$O\
+	parser.$O\
+	parsetok.$O\
+	pgen.$O\
+	pgenmain.$O\
+	printgrammar.$O\
+	tokenizer_pgen.$O\
+	../Objects/obmalloc.$O\
+	../Python/mysnprintf.$O\
+
+$O.pgen:	$PGENOFILES
+	$LD -o $target $prereq
diff --git a/sys/src/cmd/python/Parser/myreadline.c b/sys/src/cmd/python/Parser/myreadline.c
new file mode 100644
index 000000000..32a1088ad
--- /dev/null
+++ b/sys/src/cmd/python/Parser/myreadline.c
@@ -0,0 +1,219 @@
+
+/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
+   By default, or when stdin is not a tty device, we have a super
+   simple my_readline function using fgets.
+   Optionally, we can use the GNU readline library.
+   my_readline() has a different return value from GNU readline():
+   - NULL if an interrupt occurred or if an error occurred
+   - a malloc'ed empty string if EOF was read
+   - a malloc'ed string ending in \n normally
+*/
+
+#include "Python.h"
+#ifdef MS_WINDOWS
+#define WIN32_LEAN_AND_MEAN
+#include "windows.h"
+#endif /* MS_WINDOWS */
+
+#ifdef __VMS
+extern char* vms__StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt);
+#endif
+
+
+PyThreadState* _PyOS_ReadlineTState;
+
+#ifdef WITH_THREAD
+#include "pythread.h"
+static PyThread_type_lock _PyOS_ReadlineLock = NULL;
+#endif
+
+int (*PyOS_InputHook)(void) = NULL;
+
+#ifdef RISCOS
+int Py_RISCOSWimpFlag;
+#endif
+
+/* This function restarts a fgets() after an EINTR error occurred
+   except if PyOS_InterruptOccurred() returns true. */
+
+static int
+my_fgets(char *buf, int len, FILE *fp)
+{
+	char *p;
+	for (;;) {
+		if (PyOS_InputHook != NULL)
+			(void)(PyOS_InputHook)();
+		errno = 0;
+		p = fgets(buf, len, fp);
+		if (p != NULL)
+			return 0; /* No error */
+#ifdef MS_WINDOWS
+		/* In the case of a Ctrl+C or some other external event 
+		   interrupting the operation:
+		   Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32 
+		   error code (and feof() returns TRUE).
+		   Win9x: Ctrl+C seems to have no effect on fgets() returning
+		   early - the signal handler is called, but the fgets()
+		   only returns "normally" (ie, when Enter hit or feof())
+		*/
+		if (GetLastError()==ERROR_OPERATION_ABORTED) {
+			/* Signals come asynchronously, so we sleep a brief 
+			   moment before checking if the handler has been 
+			   triggered (we cant just return 1 before the 
+			   signal handler has been called, as the later 
+			   signal may be treated as a separate interrupt).
+			*/
+			Sleep(1);
+			if (PyOS_InterruptOccurred()) {
+				return 1; /* Interrupt */
+			}
+			/* Either the sleep wasn't long enough (need a
+			   short loop retrying?) or not interrupted at all
+			   (in which case we should revisit the whole thing!)
+			   Logging some warning would be nice.  assert is not
+			   viable as under the debugger, the various dialogs
+			   mean the condition is not true.
+			*/
+		}
+#endif /* MS_WINDOWS */
+		if (feof(fp)) {
+			return -1; /* EOF */
+		}
+#ifdef EINTR
+		if (errno == EINTR) {
+			int s;
+#ifdef WITH_THREAD
+			PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+			s = PyErr_CheckSignals();
+#ifdef WITH_THREAD
+			PyEval_SaveThread();
+#endif
+			if (s < 0) {
+				return 1;
+			}
+		}
+#endif
+		if (PyOS_InterruptOccurred()) {
+			return 1; /* Interrupt */
+		}
+		return -2; /* Error */
+	}
+	/* NOTREACHED */
+}
+
+
+/* Readline implementation using fgets() */
+
+char *
+PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
+{
+	size_t n;
+	char *p;
+	n = 100;
+	if ((p = (char *)PyMem_MALLOC(n)) == NULL)
+		return NULL;
+	fflush(sys_stdout);
+#ifndef RISCOS
+	if (prompt)
+		fprintf(stderr, "%s", prompt);
+#else
+	if (prompt) {
+		if(Py_RISCOSWimpFlag)
+			fprintf(stderr, "\x0cr%s\x0c", prompt);
+		else
+			fprintf(stderr, "%s", prompt);
+	}
+#endif
+	fflush(stderr);
+	switch (my_fgets(p, (int)n, sys_stdin)) {
+	case 0: /* Normal case */
+		break;
+	case 1: /* Interrupt */
+		PyMem_FREE(p);
+		return NULL;
+	case -1: /* EOF */
+	case -2: /* Error */
+	default: /* Shouldn't happen */
+		*p = '\0';
+		break;
+	}
+	n = strlen(p);
+	while (n > 0 && p[n-1] != '\n') {
+		size_t incr = n+2;
+		p = (char *)PyMem_REALLOC(p, n + incr);
+		if (p == NULL)
+			return NULL;
+		if (incr > INT_MAX) {
+			PyErr_SetString(PyExc_OverflowError, "input line too long");
+		}
+		if (my_fgets(p+n, (int)incr, sys_stdin) != 0)
+			break;
+		n += strlen(p+n);
+	}
+	return (char *)PyMem_REALLOC(p, n+1);
+}
+
+
+/* By initializing this function pointer, systems embedding Python can
+   override the readline function.
+
+   Note: Python expects in return a buffer allocated with PyMem_Malloc. */
+
+char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *);
+
+
+/* Interface used by tokenizer.c and bltinmodule.c */
+
+char *
+PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
+{
+	char *rv;
+
+	if (_PyOS_ReadlineTState == PyThreadState_GET()) {
+		PyErr_SetString(PyExc_RuntimeError,
+				"can't re-enter readline");
+		return NULL;
+	}
+	
+
+	if (PyOS_ReadlineFunctionPointer == NULL) {
+#ifdef __VMS
+                PyOS_ReadlineFunctionPointer = vms__StdioReadline;
+#else
+                PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
+#endif
+	}
+	
+#ifdef WITH_THREAD
+	if (_PyOS_ReadlineLock == NULL) {
+		_PyOS_ReadlineLock = PyThread_allocate_lock();		
+	}
+#endif
+
+	_PyOS_ReadlineTState = PyThreadState_GET();
+	Py_BEGIN_ALLOW_THREADS
+#ifdef WITH_THREAD
+	PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
+#endif
+
+        /* This is needed to handle the unlikely case that the
+         * interpreter is in interactive mode *and* stdin/out are not
+         * a tty.  This can happen, for example if python is run like
+         * this: python -i < test1.py
+         */
+        if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))
+                rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);
+        else
+                rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,
+                                                     prompt);
+	Py_END_ALLOW_THREADS
+
+#ifdef WITH_THREAD
+	PyThread_release_lock(_PyOS_ReadlineLock);
+#endif
+
+	_PyOS_ReadlineTState = NULL;
+
+	return rv;
+}
diff --git a/sys/src/cmd/python/Parser/node.c b/sys/src/cmd/python/Parser/node.c
new file mode 100644
index 000000000..d133a0d17
--- /dev/null
+++ b/sys/src/cmd/python/Parser/node.c
@@ -0,0 +1,135 @@
+/* Parse tree node implementation */
+
+#include "Python.h"
+#include "node.h"
+#include "errcode.h"
+
+node *
+PyNode_New(int type)
+{
+	node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
+	if (n == NULL)
+		return NULL;
+	n->n_type = type;
+	n->n_str = NULL;
+	n->n_lineno = 0;
+	n->n_nchildren = 0;
+	n->n_child = NULL;
+	return n;
+}
+
+/* See comments at XXXROUNDUP below.  Returns -1 on overflow. */
+static int
+fancy_roundup(int n)
+{
+	/* Round up to the closest power of 2 >= n. */
+	int result = 256;
+	assert(n > 128);
+	while (result < n) {
+		result <<= 1;
+		if (result <= 0)
+			return -1;
+	}
+	return result;
+}
+
+/* A gimmick to make massive numbers of reallocs quicker.  The result is
+ * a number >= the input.  In PyNode_AddChild, it's used like so, when
+ * we're about to add child number current_size + 1:
+ *
+ *     if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1):
+ *         allocate space for XXXROUNDUP(current_size + 1) total children
+ *     else:
+ *         we already have enough space
+ *
+ * Since a node starts out empty, we must have
+ *
+ *     XXXROUNDUP(0) < XXXROUNDUP(1)
+ *
+ * so that we allocate space for the first child.  One-child nodes are very
+ * common (presumably that would change if we used a more abstract form
+ * of syntax tree), so to avoid wasting memory it's desirable that
+ * XXXROUNDUP(1) == 1.  That in turn forces XXXROUNDUP(0) == 0.
+ *
+ * Else for 2 <= n <= 128, we round up to the closest multiple of 4.  Why 4?
+ * Rounding up to a multiple of an exact power of 2 is very efficient, and
+ * most nodes with more than one child have <= 4 kids.
+ *
+ * Else we call fancy_roundup() to grow proportionately to n.  We've got an
+ * extreme case then (like test_longexp.py), and on many platforms doing
+ * anything less than proportional growth leads to exorbitant runtime
+ * (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
+ * Win98).
+ *
+ * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre
+ * reported that, with this scheme, 89% of PyObject_REALLOC calls in
+ * PyNode_AddChild passed 1 for the size, and 9% passed 4.  So this usually
+ * wastes very little memory, but is very effective at sidestepping
+ * platform-realloc disasters on vulnerable platforms.
+ *
+ * Note that this would be straightforward if a node stored its current
+ * capacity.  The code is tricky to avoid that.
+ */
+#define XXXROUNDUP(n) ((n) <= 1 ? (n) : 		\
+		       (n) <= 128 ? (((n) + 3) & ~3) :	\
+		       fancy_roundup(n))
+
+
+int
+PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset)
+{
+	const int nch = n1->n_nchildren;
+	int current_capacity;
+	int required_capacity;
+	node *n;
+
+	if (nch == INT_MAX || nch < 0)
+		return E_OVERFLOW;
+
+	current_capacity = XXXROUNDUP(nch);
+	required_capacity = XXXROUNDUP(nch + 1);
+	if (current_capacity < 0 || required_capacity < 0)
+		return E_OVERFLOW;
+	if (current_capacity < required_capacity) {
+		n = n1->n_child;
+		n = (node *) PyObject_REALLOC(n,
+					      required_capacity * sizeof(node));
+		if (n == NULL)
+			return E_NOMEM;
+		n1->n_child = n;
+	}
+
+	n = &n1->n_child[n1->n_nchildren++];
+	n->n_type = type;
+	n->n_str = str;
+	n->n_lineno = lineno;
+	n->n_col_offset = col_offset;
+	n->n_nchildren = 0;
+	n->n_child = NULL;
+	return 0;
+}
+
+/* Forward */
+static void freechildren(node *);
+
+
+void
+PyNode_Free(node *n)
+{
+	if (n != NULL) {
+		freechildren(n);
+		PyObject_FREE(n);
+	}
+}
+
+static void
+freechildren(node *n)
+{
+	int i;
+	for (i = NCH(n); --i >= 0; )
+		freechildren(CHILD(n, i));
+	if (n->n_child != NULL)
+		PyObject_FREE(n->n_child);
+	if (STR(n) != NULL)
+		PyObject_FREE(STR(n));
+}
diff --git a/sys/src/cmd/python/Parser/parser.c b/sys/src/cmd/python/Parser/parser.c
new file mode 100644
index 000000000..2ce84cd32
--- /dev/null
+++ b/sys/src/cmd/python/Parser/parser.c
@@ -0,0 +1,433 @@
+
+/* Parser implementation */
+
+/* For a description, see the comments at end of this file */
+
+/* XXX To do: error recovery */
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "token.h"
+#include "grammar.h"
+#include "node.h"
+#include "parser.h"
+#include "errcode.h"
+
+
+#ifdef Py_DEBUG
+extern int Py_DebugFlag;
+#define D(x) if (!Py_DebugFlag); else x
+#else
+#define D(x)
+#endif
+
+
+/* STACK DATA TYPE */
+
+static void s_reset(stack *);
+
+static void
+s_reset(stack *s)
+{
+	s->s_top = &s->s_base[MAXSTACK];
+}
+
+#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
+
+static int
+s_push(register stack *s, dfa *d, node *parent)
+{
+	register stackentry *top;
+	if (s->s_top == s->s_base) {
+		fprintf(stderr, "s_push: parser stack overflow\n");
+		return E_NOMEM;
+	}
+	top = --s->s_top;
+	top->s_dfa = d;
+	top->s_parent = parent;
+	top->s_state = 0;
+	return 0;
+}
+
+#ifdef Py_DEBUG
+
+static void
+s_pop(register stack *s)
+{
+	if (s_empty(s))
+		Py_FatalError("s_pop: parser stack underflow -- FATAL");
+	s->s_top++;
+}
+
+#else /* !Py_DEBUG */
+
+#define s_pop(s) (s)->s_top++
+
+#endif
+
+
+/* PARSER CREATION */
+
+parser_state *
+PyParser_New(grammar *g, int start)
+{
+	parser_state *ps;
+	
+	if (!g->g_accel)
+		PyGrammar_AddAccelerators(g);
+	ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
+	if (ps == NULL)
+		return NULL;
+	ps->p_grammar = g;
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+	ps->p_flags = 0;
+#endif
+	ps->p_tree = PyNode_New(start);
+	if (ps->p_tree == NULL) {
+		PyMem_FREE(ps);
+		return NULL;
+	}
+	s_reset(&ps->p_stack);
+	(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
+	return ps;
+}
+
+void
+PyParser_Delete(parser_state *ps)
+{
+	/* NB If you want to save the parse tree,
+	   you must set p_tree to NULL before calling delparser! */
+	PyNode_Free(ps->p_tree);
+	PyMem_FREE(ps);
+}
+
+
+/* PARSER STACK OPERATIONS */
+
+static int
+shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
+{
+	int err;
+	assert(!s_empty(s));
+	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
+	if (err)
+		return err;
+	s->s_top->s_state = newstate;
+	return 0;
+}
+
+static int
+push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
+{
+	int err;
+	register node *n;
+	n = s->s_top->s_parent;
+	assert(!s_empty(s));
+	err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
+	if (err)
+		return err;
+	s->s_top->s_state = newstate;
+	return s_push(s, d, CHILD(n, NCH(n)-1));
+}
+
+
+/* PARSER PROPER */
+
+static int
+classify(parser_state *ps, int type, char *str)
+{
+	grammar *g = ps->p_grammar;
+	register int n = g->g_ll.ll_nlabels;
+	
+	if (type == NAME) {
+		register char *s = str;
+		register label *l = g->g_ll.ll_label;
+		register int i;
+		for (i = n; i > 0; i--, l++) {
+			if (l->lb_type != NAME || l->lb_str == NULL ||
+			    l->lb_str[0] != s[0] ||
+			    strcmp(l->lb_str, s) != 0)
+				continue;
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+			if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
+				if (s[0] == 'w' && strcmp(s, "with") == 0)
+					break; /* not a keyword yet */
+				else if (s[0] == 'a' && strcmp(s, "as") == 0)
+					break; /* not a keyword yet */
+			}
+#endif
+			D(printf("It's a keyword\n"));
+			return n - i;
+		}
+	}
+	
+	{
+		register label *l = g->g_ll.ll_label;
+		register int i;
+		for (i = n; i > 0; i--, l++) {
+			if (l->lb_type == type && l->lb_str == NULL) {
+				D(printf("It's a token we know\n"));
+				return n - i;
+			}
+		}
+	}
+	
+	D(printf("Illegal token\n"));
+	return -1;
+}
+
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+static void
+future_hack(parser_state *ps)
+{
+	node *n = ps->p_stack.s_top->s_parent;
+	node *ch, *cch;
+	int i;
+
+	/* from __future__ import ..., must have at least 4 children */
+	n = CHILD(n, 0);
+	if (NCH(n) < 4)
+		return;
+	ch = CHILD(n, 0);
+	if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
+		return;
+	ch = CHILD(n, 1);
+	if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
+	    strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
+		return;
+	ch = CHILD(n, 3);
+	/* ch can be a star, a parenthesis or import_as_names */
+	if (TYPE(ch) == STAR)
+		return;
+	if (TYPE(ch) == LPAR)
+		ch = CHILD(n, 4);
+	
+	for (i = 0; i < NCH(ch); i += 2) {
+		cch = CHILD(ch, i);
+		if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME &&
+		    strcmp(STR(CHILD(cch, 0)), "with_statement") == 0) {
+			ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
+			break;
+		}
+	}
+}
+#endif /* future keyword */
+
+int
+PyParser_AddToken(register parser_state *ps, register int type, char *str,
+	          int lineno, int col_offset, int *expected_ret)
+{
+	register int ilabel;
+	int err;
+	
+	D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
+	
+	/* Find out which label this token is */
+	ilabel = classify(ps, type, str);
+	if (ilabel < 0)
+		return E_SYNTAX;
+	
+	/* Loop until the token is shifted or an error occurred */
+	for (;;) {
+		/* Fetch the current dfa and state */
+		register dfa *d = ps->p_stack.s_top->s_dfa;
+		register state *s = &d->d_state[ps->p_stack.s_top->s_state];
+		
+		D(printf(" DFA '%s', state %d:",
+			d->d_name, ps->p_stack.s_top->s_state));
+		
+		/* Check accelerator */
+		if (s->s_lower <= ilabel && ilabel < s->s_upper) {
+			register int x = s->s_accel[ilabel - s->s_lower];
+			if (x != -1) {
+				if (x & (1<<7)) {
+					/* Push non-terminal */
+					int nt = (x >> 8) + NT_OFFSET;
+					int arrow = x & ((1<<7)-1);
+					dfa *d1 = PyGrammar_FindDFA(
+						ps->p_grammar, nt);
+					if ((err = push(&ps->p_stack, nt, d1,
+						arrow, lineno, col_offset)) > 0) {
+						D(printf(" MemError: push\n"));
+						return err;
+					}
+					D(printf(" Push ...\n"));
+					continue;
+				}
+				
+				/* Shift the token */
+				if ((err = shift(&ps->p_stack, type, str,
+						x, lineno, col_offset)) > 0) {
+					D(printf(" MemError: shift.\n"));
+					return err;
+				}
+				D(printf(" Shift.\n"));
+				/* Pop while we are in an accept-only state */
+				while (s = &d->d_state
+						[ps->p_stack.s_top->s_state],
+					s->s_accept && s->s_narcs == 1) {
+					D(printf("  DFA '%s', state %d: "
+						 "Direct pop.\n",
+						 d->d_name,
+						 ps->p_stack.s_top->s_state));
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+					if (d->d_name[0] == 'i' &&
+					    strcmp(d->d_name,
+						   "import_stmt") == 0)
+						future_hack(ps);
+#endif
+					s_pop(&ps->p_stack);
+					if (s_empty(&ps->p_stack)) {
+						D(printf("  ACCEPT.\n"));
+						return E_DONE;
+					}
+					d = ps->p_stack.s_top->s_dfa;
+				}
+				return E_OK;
+			}
+		}
+		
+		if (s->s_accept) {
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+			if (d->d_name[0] == 'i' &&
+			    strcmp(d->d_name, "import_stmt") == 0)
+				future_hack(ps);
+#endif
+			/* Pop this dfa and try again */
+			s_pop(&ps->p_stack);
+			D(printf(" Pop ...\n"));
+			if (s_empty(&ps->p_stack)) {
+				D(printf(" Error: bottom of stack.\n"));
+				return E_SYNTAX;
+			}
+			continue;
+		}
+		
+		/* Stuck, report syntax error */
+		D(printf(" Error.\n"));
+		if (expected_ret) {
+			if (s->s_lower == s->s_upper - 1) {
+				/* Only one possible expected token */
+				*expected_ret = ps->p_grammar->
+				    g_ll.ll_label[s->s_lower].lb_type;
+			}
+			else 
+		        	*expected_ret = -1;
+		}
+		return E_SYNTAX;
+	}
+}
+
+
+#ifdef Py_DEBUG
+
+/* DEBUG OUTPUT */
+
+void
+dumptree(grammar *g, node *n)
+{
+	int i;
+	
+	if (n == NULL)
+		printf("NIL");
+	else {
+		label l;
+		l.lb_type = TYPE(n);
+		l.lb_str = STR(n);
+		printf("%s", PyGrammar_LabelRepr(&l));
+		if (ISNONTERMINAL(TYPE(n))) {
+			printf("(");
+			for (i = 0; i < NCH(n); i++) {
+				if (i > 0)
+					printf(",");
+				dumptree(g, CHILD(n, i));
+			}
+			printf(")");
+		}
+	}
+}
+
+void
+showtree(grammar *g, node *n)
+{
+	int i;
+	
+	if (n == NULL)
+		return;
+	if (ISNONTERMINAL(TYPE(n))) {
+		for (i = 0; i < NCH(n); i++)
+			showtree(g, CHILD(n, i));
+	}
+	else if (ISTERMINAL(TYPE(n))) {
+		printf("%s", _PyParser_TokenNames[TYPE(n)]);
+		if (TYPE(n) == NUMBER || TYPE(n) == NAME)
+			printf("(%s)", STR(n));
+		printf(" ");
+	}
+	else
+		printf("? ");
+}
+
+void
+printtree(parser_state *ps)
+{
+	if (Py_DebugFlag) {
+		printf("Parse tree:\n");
+		dumptree(ps->p_grammar, ps->p_tree);
+		printf("\n");
+		printf("Tokens:\n");
+		showtree(ps->p_grammar, ps->p_tree);
+		printf("\n");
+	}
+	printf("Listing:\n");
+	PyNode_ListTree(ps->p_tree);
+	printf("\n");
+}
+
+#endif /* Py_DEBUG */
+
+/*
+
+Description
+-----------
+
+The parser's interface is different than usual: the function addtoken()
+must be called for each token in the input.  This makes it possible to
+turn it into an incremental parsing system later.  The parsing system
+constructs a parse tree as it goes.
+
+A parsing rule is represented as a Deterministic Finite-state Automaton
+(DFA).  A node in a DFA represents a state of the parser; an arc represents
+a transition.  Transitions are either labeled with terminal symbols or
+with non-terminals.  When the parser decides to follow an arc labeled
+with a non-terminal, it is invoked recursively with the DFA representing
+the parsing rule for that as its initial state; when that DFA accepts,
+the parser that invoked it continues.  The parse tree constructed by the
+recursively called parser is inserted as a child in the current parse tree.
+
+The DFA's can be constructed automatically from a more conventional
+language description.  An extended LL(1) grammar (ELL(1)) is suitable.
+Certain restrictions make the parser's life easier: rules that can produce
+the empty string should be outlawed (there are other ways to put loops
+or optional parts in the language).  To avoid the need to construct
+FIRST sets, we can require that all but the last alternative of a rule
+(really: arc going out of a DFA's state) must begin with a terminal
+symbol.
+
+As an example, consider this grammar:
+
+expr:	term (OP term)*
+term:	CONSTANT | '(' expr ')'
+
+The DFA corresponding to the rule for expr is:
+
+------->.---term-->.------->
+	^          |
+	|          |
+	\----OP----/
+
+The parse tree generated for the input a+b is:
+
+(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
+
+*/
diff --git a/sys/src/cmd/python/Parser/parser.h b/sys/src/cmd/python/Parser/parser.h
new file mode 100644
index 000000000..bdca3e944
--- /dev/null
+++ b/sys/src/cmd/python/Parser/parser.h
@@ -0,0 +1,42 @@
+#ifndef Py_PARSER_H
+#define Py_PARSER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Parser interface */
+
+#define MAXSTACK 500
+
+typedef struct {
+	int		 s_state;	/* State in current DFA */
+	dfa		*s_dfa;		/* Current DFA */
+	struct _node	*s_parent;	/* Where to add next node */
+} stackentry;
+
+typedef struct {
+	stackentry	*s_top;		/* Top entry */
+	stackentry	 s_base[MAXSTACK];/* Array of stack entries */
+					/* NB The stack grows down */
+} stack;
+
+typedef struct {
+	stack	 	p_stack;	/* Stack of parser states */
+	grammar		*p_grammar;	/* Grammar to use */
+	node		*p_tree;	/* Top of parse tree */
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+	unsigned long	p_flags;	/* see co_flags in Include/code.h */
+#endif
+} parser_state;
+
+parser_state *PyParser_New(grammar *g, int start);
+void PyParser_Delete(parser_state *ps);
+int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
+                      int *expected_ret);
+void PyGrammar_AddAccelerators(grammar *g);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_PARSER_H */
diff --git a/sys/src/cmd/python/Parser/parsetok.c b/sys/src/cmd/python/Parser/parsetok.c
new file mode 100644
index 000000000..be53e1c59
--- /dev/null
+++ b/sys/src/cmd/python/Parser/parsetok.c
@@ -0,0 +1,260 @@
+
+/* Parser-tokenizer link implementation */
+
+#include "pgenheaders.h"
+#include "tokenizer.h"
+#include "node.h"
+#include "grammar.h"
+#include "parser.h"
+#include "parsetok.h"
+#include "errcode.h"
+#include "graminit.h"
+
+int Py_TabcheckFlag;
+
+
+/* Forward */
+static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int);
+static void initerr(perrdetail *err_ret, const char* filename);
+
+/* Parse input coming from a string.  Return error code, print some errors. */
+node *
+PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
+{
+	return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
+}
+
+node *
+PyParser_ParseStringFlags(const char *s, grammar *g, int start,
+		          perrdetail *err_ret, int flags)
+{
+	return PyParser_ParseStringFlagsFilename(s, NULL,
+						 g, start, err_ret, flags);
+}
+
+node *
+PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
+			  grammar *g, int start,
+		          perrdetail *err_ret, int flags)
+{
+	struct tok_state *tok;
+
+	initerr(err_ret, filename);
+
+	if ((tok = PyTokenizer_FromString(s)) == NULL) {
+		err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
+		return NULL;
+	}
+
+        tok->filename = filename ? filename : "<string>";
+	if (Py_TabcheckFlag || Py_VerboseFlag) {
+		tok->altwarning = (tok->filename != NULL);
+		if (Py_TabcheckFlag >= 2)
+			tok->alterror++;
+	}
+
+	return parsetok(tok, g, start, err_ret, flags);
+}
+
+/* Parse input coming from a file.  Return error code, print some errors. */
+
+node *
+PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
+		   char *ps1, char *ps2, perrdetail *err_ret)
+{
+	return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2,
+				       err_ret, 0);
+}
+
+node *
+PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
+			char *ps1, char *ps2, perrdetail *err_ret, int flags)
+{
+	struct tok_state *tok;
+
+	initerr(err_ret, filename);
+
+	if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {
+		err_ret->error = E_NOMEM;
+		return NULL;
+	}
+	tok->filename = filename;
+	if (Py_TabcheckFlag || Py_VerboseFlag) {
+		tok->altwarning = (filename != NULL);
+		if (Py_TabcheckFlag >= 2)
+			tok->alterror++;
+	}
+
+
+	return parsetok(tok, g, start, err_ret, flags);
+}
+
+/* Parse input coming from the given tokenizer structure.
+   Return error code. */
+
+static char with_msg[] =
+"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
+
+static char as_msg[] =
+"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
+
+static void
+warn(const char *msg, const char *filename, int lineno)
+{
+	if (filename == NULL)
+		filename = "<string>";
+	PySys_WriteStderr(msg, filename, lineno);
+}
+
+static node *
+parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
+	 int flags)
+{
+	parser_state *ps;
+	node *n;
+	int started = 0, handling_import = 0, handling_with = 0;
+
+	if ((ps = PyParser_New(g, start)) == NULL) {
+		fprintf(stderr, "no mem for new parser\n");
+		err_ret->error = E_NOMEM;
+		PyTokenizer_Free(tok);
+		return NULL;
+	}
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+	if (flags & PyPARSE_WITH_IS_KEYWORD)
+		ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
+#endif
+
+	for (;;) {
+		char *a, *b;
+		int type;
+		size_t len;
+		char *str;
+		int col_offset;
+
+		type = PyTokenizer_Get(tok, &a, &b);
+		if (type == ERRORTOKEN) {
+			err_ret->error = tok->done;
+			break;
+		}
+		if (type == ENDMARKER && started) {
+			type = NEWLINE; /* Add an extra newline */
+			handling_with = handling_import = 0;
+			started = 0;
+			/* Add the right number of dedent tokens,
+			   except if a certain flag is given --
+			   codeop.py uses this. */
+			if (tok->indent &&
+			    !(flags & PyPARSE_DONT_IMPLY_DEDENT))
+			{
+				tok->pendin = -tok->indent;
+				tok->indent = 0;
+			}
+		}
+		else
+			started = 1;
+		len = b - a; /* XXX this may compute NULL - NULL */
+		str = (char *) PyObject_MALLOC(len + 1);
+		if (str == NULL) {
+			fprintf(stderr, "no mem for next token\n");
+			err_ret->error = E_NOMEM;
+			break;
+		}
+		if (len > 0)
+			strncpy(str, a, len);
+		str[len] = '\0';
+
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+		/* This is only necessary to support the "as" warning, but
+		   we don't want to warn about "as" in import statements. */
+		if (type == NAME &&
+		    len == 6 && str[0] == 'i' && strcmp(str, "import") == 0)
+			handling_import = 1;
+
+		/* Warn about with as NAME */
+		if (type == NAME &&
+		    !(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
+		    if (len == 4 && str[0] == 'w' && strcmp(str, "with") == 0)
+			warn(with_msg, err_ret->filename, tok->lineno);
+		    else if (!(handling_import || handling_with) &&
+		             len == 2 && str[0] == 'a' &&
+			     strcmp(str, "as") == 0)
+			warn(as_msg, err_ret->filename, tok->lineno);
+		}
+		else if (type == NAME &&
+			 (ps->p_flags & CO_FUTURE_WITH_STATEMENT) &&
+			 len == 4 && str[0] == 'w' && strcmp(str, "with") == 0)
+			handling_with = 1;
+#endif
+		if (a >= tok->line_start)
+			col_offset = a - tok->line_start;
+		else
+			col_offset = -1;
+			
+		if ((err_ret->error =
+		     PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
+				       &(err_ret->expected))) != E_OK) {
+			if (err_ret->error != E_DONE) {
+				PyObject_FREE(str);
+				err_ret->token = type;
+			}				
+			break;
+		}
+	}
+
+	if (err_ret->error == E_DONE) {
+		n = ps->p_tree;
+		ps->p_tree = NULL;
+	}
+	else
+		n = NULL;
+
+	PyParser_Delete(ps);
+
+	if (n == NULL) {
+		if (tok->lineno <= 1 && tok->done == E_EOF)
+			err_ret->error = E_EOF;
+		err_ret->lineno = tok->lineno;
+		if (tok->buf != NULL) {
+			size_t len;
+			assert(tok->cur - tok->buf < INT_MAX);
+			err_ret->offset = (int)(tok->cur - tok->buf);
+			len = tok->inp - tok->buf;
+			err_ret->text = (char *) PyObject_MALLOC(len + 1);
+			if (err_ret->text != NULL) {
+				if (len > 0)
+					strncpy(err_ret->text, tok->buf, len);
+				err_ret->text[len] = '\0';
+			}
+		}
+	} else if (tok->encoding != NULL) {
+		node* r = PyNode_New(encoding_decl);
+		if (!r) {
+			err_ret->error = E_NOMEM;
+			n = NULL;
+			goto done;
+		}
+		r->n_str = tok->encoding;
+		r->n_nchildren = 1;
+		r->n_child = n;
+		tok->encoding = NULL;
+		n = r;
+	}
+
+done:
+	PyTokenizer_Free(tok);
+
+	return n;
+}
+
+static void
+initerr(perrdetail *err_ret, const char *filename)
+{
+	err_ret->error = E_OK;
+	err_ret->filename = filename;
+	err_ret->lineno = 0;
+	err_ret->offset = 0;
+	err_ret->text = NULL;
+	err_ret->token = -1;
+	err_ret->expected = -1;
+}
diff --git a/sys/src/cmd/python/Parser/pgen.c b/sys/src/cmd/python/Parser/pgen.c
new file mode 100644
index 000000000..dfe7cacba
--- /dev/null
+++ b/sys/src/cmd/python/Parser/pgen.c
@@ -0,0 +1,706 @@
+/* Parser generator */
+
+/* For a description, see the comments at end of this file */
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "token.h"
+#include "node.h"
+#include "grammar.h"
+#include "metagrammar.h"
+#include "pgen.h"
+
+extern int Py_DebugFlag;
+extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
+
+
+/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
+
+typedef struct _nfaarc {
+	int	ar_label;
+	int	ar_arrow;
+} nfaarc;
+
+typedef struct _nfastate {
+	int	st_narcs;
+	nfaarc	*st_arc;
+} nfastate;
+
+typedef struct _nfa {
+	int		nf_type;
+	char		*nf_name;
+	int		nf_nstates;
+	nfastate	*nf_state;
+	int		nf_start, nf_finish;
+} nfa;
+
+/* Forward */
+static void compile_rhs(labellist *ll,
+			nfa *nf, node *n, int *pa, int *pb);
+static void compile_alt(labellist *ll,
+			nfa *nf, node *n, int *pa, int *pb);
+static void compile_item(labellist *ll,
+			 nfa *nf, node *n, int *pa, int *pb);
+static void compile_atom(labellist *ll,
+			 nfa *nf, node *n, int *pa, int *pb);
+
+static int
+addnfastate(nfa *nf)
+{
+	nfastate *st;
+	
+	nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, 
+                                    sizeof(nfastate) * (nf->nf_nstates + 1));
+	if (nf->nf_state == NULL)
+		Py_FatalError("out of mem");
+	st = &nf->nf_state[nf->nf_nstates++];
+	st->st_narcs = 0;
+	st->st_arc = NULL;
+	return st - nf->nf_state;
+}
+
+static void
+addnfaarc(nfa *nf, int from, int to, int lbl)
+{
+	nfastate *st;
+	nfaarc *ar;
+	
+	st = &nf->nf_state[from];
+	st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
+				      sizeof(nfaarc) * (st->st_narcs + 1));
+	if (st->st_arc == NULL)
+		Py_FatalError("out of mem");
+	ar = &st->st_arc[st->st_narcs++];
+	ar->ar_label = lbl;
+	ar->ar_arrow = to;
+}
+
+static nfa *
+newnfa(char *name)
+{
+	nfa *nf;
+	static int type = NT_OFFSET; /* All types will be disjunct */
+	
+	nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
+	if (nf == NULL)
+		Py_FatalError("no mem for new nfa");
+	nf->nf_type = type++;
+	nf->nf_name = name; /* XXX strdup(name) ??? */
+	nf->nf_nstates = 0;
+	nf->nf_state = NULL;
+	nf->nf_start = nf->nf_finish = -1;
+	return nf;
+}
+
+typedef struct _nfagrammar {
+	int		gr_nnfas;
+	nfa		**gr_nfa;
+	labellist	gr_ll;
+} nfagrammar;
+
+/* Forward */
+static void compile_rule(nfagrammar *gr, node *n);
+
+static nfagrammar *
+newnfagrammar(void)
+{
+	nfagrammar *gr;
+	
+	gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
+	if (gr == NULL)
+		Py_FatalError("no mem for new nfa grammar");
+	gr->gr_nnfas = 0;
+	gr->gr_nfa = NULL;
+	gr->gr_ll.ll_nlabels = 0;
+	gr->gr_ll.ll_label = NULL;
+	addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
+	return gr;
+}
+
+static nfa *
+addnfa(nfagrammar *gr, char *name)
+{
+	nfa *nf;
+	
+	nf = newnfa(name);
+	gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
+				      sizeof(nfa) * (gr->gr_nnfas + 1));
+	if (gr->gr_nfa == NULL)
+		Py_FatalError("out of mem");
+	gr->gr_nfa[gr->gr_nnfas++] = nf;
+	addlabel(&gr->gr_ll, NAME, nf->nf_name);
+	return nf;
+}
+
+#ifdef Py_DEBUG
+
+static char REQNFMT[] = "metacompile: less than %d children\n";
+
+#define REQN(i, count) \
+ 	if (i < count) { \
+		fprintf(stderr, REQNFMT, count); \
+		Py_FatalError("REQN"); \
+	} else
+
+#else
+#define REQN(i, count)	/* empty */
+#endif
+
+static nfagrammar *
+metacompile(node *n)
+{
+	nfagrammar *gr;
+	int i;
+
+	if (Py_DebugFlag)
+		printf("Compiling (meta-) parse tree into NFA grammar\n");
+	gr = newnfagrammar();
+	REQ(n, MSTART);
+	i = n->n_nchildren - 1; /* Last child is ENDMARKER */
+	n = n->n_child;
+	for (; --i >= 0; n++) {
+		if (n->n_type != NEWLINE)
+			compile_rule(gr, n);
+	}
+	return gr;
+}
+
+static void
+compile_rule(nfagrammar *gr, node *n)
+{
+	nfa *nf;
+	
+	REQ(n, RULE);
+	REQN(n->n_nchildren, 4);
+	n = n->n_child;
+	REQ(n, NAME);
+	nf = addnfa(gr, n->n_str);
+	n++;
+	REQ(n, COLON);
+	n++;
+	REQ(n, RHS);
+	compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
+	n++;
+	REQ(n, NEWLINE);
+}
+
+static void
+compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+	int i;
+	int a, b;
+	
+	REQ(n, RHS);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	REQ(n, ALT);
+	compile_alt(ll, nf, n, pa, pb);
+	if (--i <= 0)
+		return;
+	n++;
+	a = *pa;
+	b = *pb;
+	*pa = addnfastate(nf);
+	*pb = addnfastate(nf);
+	addnfaarc(nf, *pa, a, EMPTY);
+	addnfaarc(nf, b, *pb, EMPTY);
+	for (; --i >= 0; n++) {
+		REQ(n, VBAR);
+		REQN(i, 1);
+		--i;
+		n++;
+		REQ(n, ALT);
+		compile_alt(ll, nf, n, &a, &b);
+		addnfaarc(nf, *pa, a, EMPTY);
+		addnfaarc(nf, b, *pb, EMPTY);
+	}
+}
+
+static void
+compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+	int i;
+	int a, b;
+	
+	REQ(n, ALT);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	REQ(n, ITEM);
+	compile_item(ll, nf, n, pa, pb);
+	--i;
+	n++;
+	for (; --i >= 0; n++) {
+		REQ(n, ITEM);
+		compile_item(ll, nf, n, &a, &b);
+		addnfaarc(nf, *pb, a, EMPTY);
+		*pb = b;
+	}
+}
+
+static void
+compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+	int i;
+	int a, b;
+	
+	REQ(n, ITEM);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	if (n->n_type == LSQB) {
+		REQN(i, 3);
+		n++;
+		REQ(n, RHS);
+		*pa = addnfastate(nf);
+		*pb = addnfastate(nf);
+		addnfaarc(nf, *pa, *pb, EMPTY);
+		compile_rhs(ll, nf, n, &a, &b);
+		addnfaarc(nf, *pa, a, EMPTY);
+		addnfaarc(nf, b, *pb, EMPTY);
+		REQN(i, 1);
+		n++;
+		REQ(n, RSQB);
+	}
+	else {
+		compile_atom(ll, nf, n, pa, pb);
+		if (--i <= 0)
+			return;
+		n++;
+		addnfaarc(nf, *pb, *pa, EMPTY);
+		if (n->n_type == STAR)
+			*pb = *pa;
+		else
+			REQ(n, PLUS);
+	}
+}
+
+static void
+compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+	int i;
+	
+	REQ(n, ATOM);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	if (n->n_type == LPAR) {
+		REQN(i, 3);
+		n++;
+		REQ(n, RHS);
+		compile_rhs(ll, nf, n, pa, pb);
+		n++;
+		REQ(n, RPAR);
+	}
+	else if (n->n_type == NAME || n->n_type == STRING) {
+		*pa = addnfastate(nf);
+		*pb = addnfastate(nf);
+		addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
+	}
+	else
+		REQ(n, NAME);
+}
+
+static void
+dumpstate(labellist *ll, nfa *nf, int istate)
+{
+	nfastate *st;
+	int i;
+	nfaarc *ar;
+	
+	printf("%c%2d%c",
+		istate == nf->nf_start ? '*' : ' ',
+		istate,
+		istate == nf->nf_finish ? '.' : ' ');
+	st = &nf->nf_state[istate];
+	ar = st->st_arc;
+	for (i = 0; i < st->st_narcs; i++) {
+		if (i > 0)
+			printf("\n    ");
+		printf("-> %2d  %s", ar->ar_arrow,
+			PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
+		ar++;
+	}
+	printf("\n");
+}
+
+static void
+dumpnfa(labellist *ll, nfa *nf)
+{
+	int i;
+	
+	printf("NFA '%s' has %d states; start %d, finish %d\n",
+		nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
+	for (i = 0; i < nf->nf_nstates; i++)
+		dumpstate(ll, nf, i);
+}
+
+
+/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
+
+static void
+addclosure(bitset ss, nfa *nf, int istate)
+{
+	if (addbit(ss, istate)) {
+		nfastate *st = &nf->nf_state[istate];
+		nfaarc *ar = st->st_arc;
+		int i;
+		
+		for (i = st->st_narcs; --i >= 0; ) {
+			if (ar->ar_label == EMPTY)
+				addclosure(ss, nf, ar->ar_arrow);
+			ar++;
+		}
+	}
+}
+
+typedef struct _ss_arc {
+	bitset	sa_bitset;
+	int	sa_arrow;
+	int	sa_label;
+} ss_arc;
+
+typedef struct _ss_state {
+	bitset	ss_ss;
+	int	ss_narcs;
+	struct _ss_arc	*ss_arc;
+	int	ss_deleted;
+	int	ss_finish;
+	int	ss_rename;
+} ss_state;
+
+typedef struct _ss_dfa {
+	int	sd_nstates;
+	ss_state *sd_state;
+} ss_dfa;
+
+/* Forward */
+static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
+		       labellist *ll, char *msg);
+static void simplify(int xx_nstates, ss_state *xx_state);
+static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
+
+static void
+makedfa(nfagrammar *gr, nfa *nf, dfa *d)
+{
+	int nbits = nf->nf_nstates;
+	bitset ss;
+	int xx_nstates;
+	ss_state *xx_state, *yy;
+	ss_arc *zz;
+	int istate, jstate, iarc, jarc, ibit;
+	nfastate *st;
+	nfaarc *ar;
+	
+	ss = newbitset(nbits);
+	addclosure(ss, nf, nf->nf_start);
+	xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
+	if (xx_state == NULL)
+		Py_FatalError("no mem for xx_state in makedfa");
+	xx_nstates = 1;
+	yy = &xx_state[0];
+	yy->ss_ss = ss;
+	yy->ss_narcs = 0;
+	yy->ss_arc = NULL;
+	yy->ss_deleted = 0;
+	yy->ss_finish = testbit(ss, nf->nf_finish);
+	if (yy->ss_finish)
+		printf("Error: nonterminal '%s' may produce empty.\n",
+			nf->nf_name);
+	
+	/* This algorithm is from a book written before
+	   the invention of structured programming... */
+
+	/* For each unmarked state... */
+	for (istate = 0; istate < xx_nstates; ++istate) {
+		size_t size;
+		yy = &xx_state[istate];
+		ss = yy->ss_ss;
+		/* For all its states... */
+		for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
+			if (!testbit(ss, ibit))
+				continue;
+			st = &nf->nf_state[ibit];
+			/* For all non-empty arcs from this state... */
+			for (iarc = 0; iarc < st->st_narcs; iarc++) {
+				ar = &st->st_arc[iarc];
+				if (ar->ar_label == EMPTY)
+					continue;
+				/* Look up in list of arcs from this state */
+				for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
+					zz = &yy->ss_arc[jarc];
+					if (ar->ar_label == zz->sa_label)
+						goto found;
+				}
+				/* Add new arc for this state */
+				size = sizeof(ss_arc) * (yy->ss_narcs + 1);
+				yy->ss_arc = (ss_arc *)PyObject_REALLOC(
+                                                            yy->ss_arc, size);
+				if (yy->ss_arc == NULL)
+					Py_FatalError("out of mem");
+				zz = &yy->ss_arc[yy->ss_narcs++];
+				zz->sa_label = ar->ar_label;
+				zz->sa_bitset = newbitset(nbits);
+				zz->sa_arrow = -1;
+			 found:	;
+				/* Add destination */
+				addclosure(zz->sa_bitset, nf, ar->ar_arrow);
+			}
+		}
+		/* Now look up all the arrow states */
+		for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
+			zz = &xx_state[istate].ss_arc[jarc];
+			for (jstate = 0; jstate < xx_nstates; jstate++) {
+				if (samebitset(zz->sa_bitset,
+					xx_state[jstate].ss_ss, nbits)) {
+					zz->sa_arrow = jstate;
+					goto done;
+				}
+			}
+			size = sizeof(ss_state) * (xx_nstates + 1);
+			xx_state = (ss_state *)PyObject_REALLOC(xx_state, 
+                                                                    size);
+			if (xx_state == NULL)
+				Py_FatalError("out of mem");
+			zz->sa_arrow = xx_nstates;
+			yy = &xx_state[xx_nstates++];
+			yy->ss_ss = zz->sa_bitset;
+			yy->ss_narcs = 0;
+			yy->ss_arc = NULL;
+			yy->ss_deleted = 0;
+			yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
+		 done:	;
+		}
+	}
+	
+	if (Py_DebugFlag)
+		printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+						"before minimizing");
+	
+	simplify(xx_nstates, xx_state);
+	
+	if (Py_DebugFlag)
+		printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+						"after minimizing");
+	
+	convert(d, xx_nstates, xx_state);
+	
+	/* XXX cleanup */
+}
+
+static void
+printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
+	   labellist *ll, char *msg)
+{
+	int i, ibit, iarc;
+	ss_state *yy;
+	ss_arc *zz;
+	
+	printf("Subset DFA %s\n", msg);
+	for (i = 0; i < xx_nstates; i++) {
+		yy = &xx_state[i];
+		if (yy->ss_deleted)
+			continue;
+		printf(" Subset %d", i);
+		if (yy->ss_finish)
+			printf(" (finish)");
+		printf(" { ");
+		for (ibit = 0; ibit < nbits; ibit++) {
+			if (testbit(yy->ss_ss, ibit))
+				printf("%d ", ibit);
+		}
+		printf("}\n");
+		for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
+			zz = &yy->ss_arc[iarc];
+			printf("  Arc to state %d, label %s\n",
+				zz->sa_arrow,
+				PyGrammar_LabelRepr(
+					&ll->ll_label[zz->sa_label]));
+		}
+	}
+}
+
+
+/* PART THREE -- SIMPLIFY DFA */
+
+/* Simplify the DFA by repeatedly eliminating states that are
+   equivalent to another oner.  This is NOT Algorithm 3.3 from
+   [Aho&Ullman 77].  It does not always finds the minimal DFA,
+   but it does usually make a much smaller one...  (For an example
+   of sub-optimal behavior, try S: x a b+ | y a b+.)
+*/
+
+static int
+samestate(ss_state *s1, ss_state *s2)
+{
+	int i;
+	
+	if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
+		return 0;
+	for (i = 0; i < s1->ss_narcs; i++) {
+		if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
+			s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
+			return 0;
+	}
+	return 1;
+}
+
+static void
+renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
+{
+	int i, j;
+	
+	if (Py_DebugFlag)
+		printf("Rename state %d to %d.\n", from, to);
+	for (i = 0; i < xx_nstates; i++) {
+		if (xx_state[i].ss_deleted)
+			continue;
+		for (j = 0; j < xx_state[i].ss_narcs; j++) {
+			if (xx_state[i].ss_arc[j].sa_arrow == from)
+				xx_state[i].ss_arc[j].sa_arrow = to;
+		}
+	}
+}
+
+static void
+simplify(int xx_nstates, ss_state *xx_state)
+{
+	int changes;
+	int i, j;
+	
+	do {
+		changes = 0;
+		for (i = 1; i < xx_nstates; i++) {
+			if (xx_state[i].ss_deleted)
+				continue;
+			for (j = 0; j < i; j++) {
+				if (xx_state[j].ss_deleted)
+					continue;
+				if (samestate(&xx_state[i], &xx_state[j])) {
+					xx_state[i].ss_deleted++;
+					renamestates(xx_nstates, xx_state,
+						     i, j);
+					changes++;
+					break;
+				}
+			}
+		}
+	} while (changes);
+}
+
+
+/* PART FOUR -- GENERATE PARSING TABLES */
+
+/* Convert the DFA into a grammar that can be used by our parser */
+
+static void
+convert(dfa *d, int xx_nstates, ss_state *xx_state)
+{
+	int i, j;
+	ss_state *yy;
+	ss_arc *zz;
+	
+	for (i = 0; i < xx_nstates; i++) {
+		yy = &xx_state[i];
+		if (yy->ss_deleted)
+			continue;
+		yy->ss_rename = addstate(d);
+	}
+	
+	for (i = 0; i < xx_nstates; i++) {
+		yy = &xx_state[i];
+		if (yy->ss_deleted)
+			continue;
+		for (j = 0; j < yy->ss_narcs; j++) {
+			zz = &yy->ss_arc[j];
+			addarc(d, yy->ss_rename,
+				xx_state[zz->sa_arrow].ss_rename,
+				zz->sa_label);
+		}
+		if (yy->ss_finish)
+			addarc(d, yy->ss_rename, yy->ss_rename, 0);
+	}
+	
+	d->d_initial = 0;
+}
+
+
+/* PART FIVE -- GLUE IT ALL TOGETHER */
+
+static grammar *
+maketables(nfagrammar *gr)
+{
+	int i;
+	nfa *nf;
+	dfa *d;
+	grammar *g;
+	
+	if (gr->gr_nnfas == 0)
+		return NULL;
+	g = newgrammar(gr->gr_nfa[0]->nf_type);
+			/* XXX first rule must be start rule */
+	g->g_ll = gr->gr_ll;
+	
+	for (i = 0; i < gr->gr_nnfas; i++) {
+		nf = gr->gr_nfa[i];
+		if (Py_DebugFlag) {
+			printf("Dump of NFA for '%s' ...\n", nf->nf_name);
+			dumpnfa(&gr->gr_ll, nf);
+			printf("Making DFA for '%s' ...\n", nf->nf_name);
+		}
+		d = adddfa(g, nf->nf_type, nf->nf_name);
+		makedfa(gr, gr->gr_nfa[i], d);
+	}
+	
+	return g;
+}
+
+grammar *
+pgen(node *n)
+{
+	nfagrammar *gr;
+	grammar *g;
+	
+	gr = metacompile(n);
+	g = maketables(gr);
+	translatelabels(g);
+	addfirstsets(g);
+	return g;
+}
+
+grammar *
+Py_pgen(node *n)
+{
+  return pgen(n);
+}
+
+/*
+
+Description
+-----------
+
+Input is a grammar in extended BNF (using * for repetition, + for
+at-least-once repetition, [] for optional parts, | for alternatives and
+() for grouping).  This has already been parsed and turned into a parse
+tree.
+
+Each rule is considered as a regular expression in its own right.
+It is turned into a Non-deterministic Finite Automaton (NFA), which
+is then turned into a Deterministic Finite Automaton (DFA), which is then
+optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
+or similar compiler books (this technique is more often used for lexical
+analyzers).
+
+The DFA's are used by the parser as parsing tables in a special way
+that's probably unique.  Before they are usable, the FIRST sets of all
+non-terminals are computed.
+
+Reference
+---------
+
+[Aho&Ullman 77]
+	Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
+	(first edition)
+
+*/
diff --git a/sys/src/cmd/python/Parser/pgenmain.c b/sys/src/cmd/python/Parser/pgenmain.c
new file mode 100644
index 000000000..fc27a2cd1
--- /dev/null
+++ b/sys/src/cmd/python/Parser/pgenmain.c
@@ -0,0 +1,173 @@
+
+/* Parser generator main program */
+
+/* This expects a filename containing the grammar as argv[1] (UNIX)
+   or asks the console for such a file name (THINK C).
+   It writes its output on two files in the current directory:
+   - "graminit.c" gets the grammar as a bunch of initialized data
+   - "graminit.h" gets the grammar's non-terminals as #defines.
+   Error messages and status info during the generation process are
+   written to stdout, or sometimes to stderr. */
+
+/* XXX TO DO:
+   - check for duplicate definitions of names (instead of fatal err)
+*/
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "node.h"
+#include "parsetok.h"
+#include "pgen.h"
+
+int Py_DebugFlag;
+int Py_VerboseFlag;
+int Py_IgnoreEnvironmentFlag;
+
+/* Forward */
+grammar *getgrammar(char *filename);
+
+void
+Py_Exit(int sts)
+{
+	exit(sts);
+}
+
+int
+main(int argc, char **argv)
+{
+	grammar *g;
+	FILE *fp;
+	char *filename, *graminit_h, *graminit_c;
+	
+	if (argc != 4) {
+		fprintf(stderr,
+			"usage: %s grammar graminit.h graminit.c\n", argv[0]);
+		Py_Exit(2);
+	}
+	filename = argv[1];
+	graminit_h = argv[2];
+	graminit_c = argv[3];
+	g = getgrammar(filename);
+	fp = fopen(graminit_c, "w");
+	if (fp == NULL) {
+		perror(graminit_c);
+		Py_Exit(1);
+	}
+	if (Py_DebugFlag)
+		printf("Writing %s ...\n", graminit_c);
+	printgrammar(g, fp);
+	fclose(fp);
+	fp = fopen(graminit_h, "w");
+	if (fp == NULL) {
+		perror(graminit_h);
+		Py_Exit(1);
+	}
+	if (Py_DebugFlag)
+		printf("Writing %s ...\n", graminit_h);
+	printnonterminals(g, fp);
+	fclose(fp);
+	Py_Exit(0);
+	return 0; /* Make gcc -Wall happy */
+}
+
+grammar *
+getgrammar(char *filename)
+{
+	FILE *fp;
+	node *n;
+	grammar *g0, *g;
+	perrdetail err;
+	
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		perror(filename);
+		Py_Exit(1);
+	}
+	g0 = meta_grammar();
+	n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
+		      (char *)NULL, (char *)NULL, &err);
+	fclose(fp);
+	if (n == NULL) {
+		fprintf(stderr, "Parsing error %d, line %d.\n",
+			err.error, err.lineno);
+		if (err.text != NULL) {
+			size_t i;
+			fprintf(stderr, "%s", err.text);
+			i = strlen(err.text);
+			if (i == 0 || err.text[i-1] != '\n')
+				fprintf(stderr, "\n");
+			for (i = 0; i < err.offset; i++) {
+				if (err.text[i] == '\t')
+					putc('\t', stderr);
+				else
+					putc(' ', stderr);
+			}
+			fprintf(stderr, "^\n");
+			PyObject_FREE(err.text);
+		}
+		Py_Exit(1);
+	}
+	g = pgen(n);
+	if (g == NULL) {
+		printf("Bad grammar.\n");
+		Py_Exit(1);
+	}
+	return g;
+}
+
+/* Can't happen in pgen */
+PyObject*
+PyErr_Occurred()
+{
+	return 0;
+}
+
+void
+Py_FatalError(const char *msg)
+{
+	fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
+	Py_Exit(1);
+}
+
+/* No-nonsense my_readline() for tokenizer.c */
+
+char *
+PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
+{
+	size_t n = 1000;
+	char *p = (char *)PyMem_MALLOC(n);
+	char *q;
+	if (p == NULL)
+		return NULL;
+	fprintf(stderr, "%s", prompt);
+	q = fgets(p, n, sys_stdin);
+	if (q == NULL) {
+		*p = '\0';
+		return p;
+	}
+	n = strlen(p);
+	if (n > 0 && p[n-1] != '\n')
+		p[n-1] = '\n';
+	return (char *)PyMem_REALLOC(p, n+1);
+}
+
+/* No-nonsense fgets */
+char *
+Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
+{
+	return fgets(buf, n, stream);
+}
+
+
+#include <stdarg.h>
+
+void
+PySys_WriteStderr(const char *format, ...)
+{
+	va_list va;
+
+	va_start(va, format);
+	vfprintf(stderr, format, va);
+	va_end(va);
+}
diff --git a/sys/src/cmd/python/Parser/printgrammar.c b/sys/src/cmd/python/Parser/printgrammar.c
new file mode 100644
index 000000000..554069814
--- /dev/null
+++ b/sys/src/cmd/python/Parser/printgrammar.c
@@ -0,0 +1,113 @@
+
+/* Print a bunch of C initializers that represent a grammar */
+
+#include "pgenheaders.h"
+#include "grammar.h"
+
+/* Forward */
+static void printarcs(int, dfa *, FILE *);
+static void printstates(grammar *, FILE *);
+static void printdfas(grammar *, FILE *);
+static void printlabels(grammar *, FILE *);
+
+void
+printgrammar(grammar *g, FILE *fp)
+{
+	fprintf(fp, "#include \"pgenheaders.h\"\n");
+	fprintf(fp, "#include \"grammar.h\"\n");
+	printdfas(g, fp);
+	printlabels(g, fp);
+	fprintf(fp, "grammar _PyParser_Grammar = {\n");
+	fprintf(fp, "\t%d,\n", g->g_ndfas);
+	fprintf(fp, "\tdfas,\n");
+	fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels);
+	fprintf(fp, "\t%d\n", g->g_start);
+	fprintf(fp, "};\n");
+}
+
+void
+printnonterminals(grammar *g, FILE *fp)
+{
+	dfa *d;
+	int i;
+	
+	d = g->g_dfa;
+	for (i = g->g_ndfas; --i >= 0; d++)
+		fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
+}
+
+static void
+printarcs(int i, dfa *d, FILE *fp)
+{
+	arc *a;
+	state *s;
+	int j, k;
+	
+	s = d->d_state;
+	for (j = 0; j < d->d_nstates; j++, s++) {
+		fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
+			i, j, s->s_narcs);
+		a = s->s_arc;
+		for (k = 0; k < s->s_narcs; k++, a++)
+			fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow);
+		fprintf(fp, "};\n");
+	}
+}
+
+static void
+printstates(grammar *g, FILE *fp)
+{
+	state *s;
+	dfa *d;
+	int i, j;
+	
+	d = g->g_dfa;
+	for (i = 0; i < g->g_ndfas; i++, d++) {
+		printarcs(i, d, fp);
+		fprintf(fp, "static state states_%d[%d] = {\n",
+			i, d->d_nstates);
+		s = d->d_state;
+		for (j = 0; j < d->d_nstates; j++, s++)
+			fprintf(fp, "\t{%d, arcs_%d_%d},\n",
+				s->s_narcs, i, j);
+		fprintf(fp, "};\n");
+	}
+}
+
+static void
+printdfas(grammar *g, FILE *fp)
+{
+	dfa *d;
+	int i, j;
+	
+	printstates(g, fp);
+	fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
+	d = g->g_dfa;
+	for (i = 0; i < g->g_ndfas; i++, d++) {
+		fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n",
+			d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
+		fprintf(fp, "\t \"");
+		for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
+			fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
+		fprintf(fp, "\"},\n");
+	}
+	fprintf(fp, "};\n");
+}
+
+static void
+printlabels(grammar *g, FILE *fp)
+{
+	label *l;
+	int i;
+	
+	fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
+	l = g->g_ll.ll_label;
+	for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
+		if (l->lb_str == NULL)
+			fprintf(fp, "\t{%d, 0},\n", l->lb_type);
+		else
+			fprintf(fp, "\t{%d, \"%s\"},\n",
+				l->lb_type, l->lb_str);
+	}
+	fprintf(fp, "};\n");
+}
diff --git a/sys/src/cmd/python/Parser/spark.py b/sys/src/cmd/python/Parser/spark.py
new file mode 100644
index 000000000..2c18623a0
--- /dev/null
+++ b/sys/src/cmd/python/Parser/spark.py
@@ -0,0 +1,840 @@
+#  Copyright (c) 1998-2002 John Aycock
+#
+#  Permission is hereby granted, free of charge, to any person obtaining
+#  a copy of this software and associated documentation files (the
+#  "Software"), to deal in the Software without restriction, including
+#  without limitation the rights to use, copy, modify, merge, publish,
+#  distribute, sublicense, and/or sell copies of the Software, and to
+#  permit persons to whom the Software is furnished to do so, subject to
+#  the following conditions:
+#
+#  The above copyright notice and this permission notice shall be
+#  included in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+#  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+#  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+__version__ = 'SPARK-0.7 (pre-alpha-5)'
+
+import re
+import sys
+import string
+
+def _namelist(instance):
+    namelist, namedict, classlist = [], {}, [instance.__class__]
+    for c in classlist:
+        for b in c.__bases__:
+            classlist.append(b)
+        for name in c.__dict__.keys():
+            if not namedict.has_key(name):
+                namelist.append(name)
+                namedict[name] = 1
+    return namelist
+
+class GenericScanner:
+    def __init__(self, flags=0):
+        pattern = self.reflect()
+        self.re = re.compile(pattern, re.VERBOSE|flags)
+
+        self.index2func = {}
+        for name, number in self.re.groupindex.items():
+            self.index2func[number-1] = getattr(self, 't_' + name)
+
+    def makeRE(self, name):
+        doc = getattr(self, name).__doc__
+        rv = '(?P<%s>%s)' % (name[2:], doc)
+        return rv
+
+    def reflect(self):
+        rv = []
+        for name in _namelist(self):
+            if name[:2] == 't_' and name != 't_default':
+                rv.append(self.makeRE(name))
+
+        rv.append(self.makeRE('t_default'))
+        return string.join(rv, '|')
+
+    def error(self, s, pos):
+        print "Lexical error at position %s" % pos
+        raise SystemExit
+
+    def tokenize(self, s):
+        pos = 0
+        n = len(s)
+        while pos < n:
+            m = self.re.match(s, pos)
+            if m is None:
+                self.error(s, pos)
+
+            groups = m.groups()
+            for i in range(len(groups)):
+                if groups[i] and self.index2func.has_key(i):
+                    self.index2func[i](groups[i])
+            pos = m.end()
+
+    def t_default(self, s):
+        r'( . | \n )+'
+        print "Specification error: unmatched input"
+        raise SystemExit
+
+#
+#  Extracted from GenericParser and made global so that [un]picking works.
+#
+class _State:
+    def __init__(self, stateno, items):
+        self.T, self.complete, self.items = [], [], items
+        self.stateno = stateno
+
+class GenericParser:
+    #
+    #  An Earley parser, as per J. Earley, "An Efficient Context-Free
+    #  Parsing Algorithm", CACM 13(2), pp. 94-102.  Also J. C. Earley,
+    #  "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
+    #  Carnegie-Mellon University, August 1968.  New formulation of
+    #  the parser according to J. Aycock, "Practical Earley Parsing
+    #  and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
+    #  2001, and J. Aycock and R. N. Horspool, "Practical Earley
+    #  Parsing", unpublished paper, 2001.
+    #
+
+    def __init__(self, start):
+        self.rules = {}
+        self.rule2func = {}
+        self.rule2name = {}
+        self.collectRules()
+        self.augment(start)
+        self.ruleschanged = 1
+
+    _NULLABLE = '\e_'
+    _START = 'START'
+    _BOF = '|-'
+
+    #
+    #  When pickling, take the time to generate the full state machine;
+    #  some information is then extraneous, too.  Unfortunately we
+    #  can't save the rule2func map.
+    #
+    def __getstate__(self):
+        if self.ruleschanged:
+            #
+            #  XXX - duplicated from parse()
+            #
+            self.computeNull()
+            self.newrules = {}
+            self.new2old = {}
+            self.makeNewRules()
+            self.ruleschanged = 0
+            self.edges, self.cores = {}, {}
+            self.states = { 0: self.makeState0() }
+            self.makeState(0, self._BOF)
+        #
+        #  XXX - should find a better way to do this..
+        #
+        changes = 1
+        while changes:
+            changes = 0
+            for k, v in self.edges.items():
+                if v is None:
+                    state, sym = k
+                    if self.states.has_key(state):
+                        self.goto(state, sym)
+                        changes = 1
+        rv = self.__dict__.copy()
+        for s in self.states.values():
+            del s.items
+        del rv['rule2func']
+        del rv['nullable']
+        del rv['cores']
+        return rv
+
+    def __setstate__(self, D):
+        self.rules = {}
+        self.rule2func = {}
+        self.rule2name = {}
+        self.collectRules()
+        start = D['rules'][self._START][0][1][1]        # Blech.
+        self.augment(start)
+        D['rule2func'] = self.rule2func
+        D['makeSet'] = self.makeSet_fast
+        self.__dict__ = D
+
+    #
+    #  A hook for GenericASTBuilder and GenericASTMatcher.  Mess
+    #  thee not with this; nor shall thee toucheth the _preprocess
+    #  argument to addRule.
+    #
+    def preprocess(self, rule, func):       return rule, func
+
+    def addRule(self, doc, func, _preprocess=1):
+        fn = func
+        rules = string.split(doc)
+
+        index = []
+        for i in range(len(rules)):
+            if rules[i] == '::=':
+                index.append(i-1)
+        index.append(len(rules))
+
+        for i in range(len(index)-1):
+            lhs = rules[index[i]]
+            rhs = rules[index[i]+2:index[i+1]]
+            rule = (lhs, tuple(rhs))
+
+            if _preprocess:
+                rule, fn = self.preprocess(rule, func)
+
+            if self.rules.has_key(lhs):
+                self.rules[lhs].append(rule)
+            else:
+                self.rules[lhs] = [ rule ]
+            self.rule2func[rule] = fn
+            self.rule2name[rule] = func.__name__[2:]
+        self.ruleschanged = 1
+
+    def collectRules(self):
+        for name in _namelist(self):
+            if name[:2] == 'p_':
+                func = getattr(self, name)
+                doc = func.__doc__
+                self.addRule(doc, func)
+
+    def augment(self, start):
+        rule = '%s ::= %s %s' % (self._START, self._BOF, start)
+        self.addRule(rule, lambda args: args[1], 0)
+
+    def computeNull(self):
+        self.nullable = {}
+        tbd = []
+
+        for rulelist in self.rules.values():
+            lhs = rulelist[0][0]
+            self.nullable[lhs] = 0
+            for rule in rulelist:
+                rhs = rule[1]
+                if len(rhs) == 0:
+                    self.nullable[lhs] = 1
+                    continue
+                #
+                #  We only need to consider rules which
+                #  consist entirely of nonterminal symbols.
+                #  This should be a savings on typical
+                #  grammars.
+                #
+                for sym in rhs:
+                    if not self.rules.has_key(sym):
+                        break
+                else:
+                    tbd.append(rule)
+        changes = 1
+        while changes:
+            changes = 0
+            for lhs, rhs in tbd:
+                if self.nullable[lhs]:
+                    continue
+                for sym in rhs:
+                    if not self.nullable[sym]:
+                        break
+                else:
+                    self.nullable[lhs] = 1
+                    changes = 1
+
+    def makeState0(self):
+        s0 = _State(0, [])
+        for rule in self.newrules[self._START]:
+            s0.items.append((rule, 0))
+        return s0
+
+    def finalState(self, tokens):
+        #
+        #  Yuck.
+        #
+        if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
+            return 1
+        start = self.rules[self._START][0][1][1]
+        return self.goto(1, start)
+
+    def makeNewRules(self):
+        worklist = []
+        for rulelist in self.rules.values():
+            for rule in rulelist:
+                worklist.append((rule, 0, 1, rule))
+
+        for rule, i, candidate, oldrule in worklist:
+            lhs, rhs = rule
+            n = len(rhs)
+            while i < n:
+                sym = rhs[i]
+                if not self.rules.has_key(sym) or \
+                   not self.nullable[sym]:
+                    candidate = 0
+                    i = i + 1
+                    continue
+
+                newrhs = list(rhs)
+                newrhs[i] = self._NULLABLE+sym
+                newrule = (lhs, tuple(newrhs))
+                worklist.append((newrule, i+1,
+                                 candidate, oldrule))
+                candidate = 0
+                i = i + 1
+            else:
+                if candidate:
+                    lhs = self._NULLABLE+lhs
+                    rule = (lhs, rhs)
+                if self.newrules.has_key(lhs):
+                    self.newrules[lhs].append(rule)
+                else:
+                    self.newrules[lhs] = [ rule ]
+                self.new2old[rule] = oldrule
+
+    def typestring(self, token):
+        return None
+
+    def error(self, token):
+        print "Syntax error at or near `%s' token" % token
+        raise SystemExit
+
+    def parse(self, tokens):
+        sets = [ [(1,0), (2,0)] ]
+        self.links = {}
+
+        if self.ruleschanged:
+            self.computeNull()
+            self.newrules = {}
+            self.new2old = {}
+            self.makeNewRules()
+            self.ruleschanged = 0
+            self.edges, self.cores = {}, {}
+            self.states = { 0: self.makeState0() }
+            self.makeState(0, self._BOF)
+
+        for i in xrange(len(tokens)):
+            sets.append([])
+
+            if sets[i] == []:
+                break
+            self.makeSet(tokens[i], sets, i)
+        else:
+            sets.append([])
+            self.makeSet(None, sets, len(tokens))
+
+        #_dump(tokens, sets, self.states)
+
+        finalitem = (self.finalState(tokens), 0)
+        if finalitem not in sets[-2]:
+            if len(tokens) > 0:
+                self.error(tokens[i-1])
+            else:
+                self.error(None)
+
+        return self.buildTree(self._START, finalitem,
+                              tokens, len(sets)-2)
+
+    def isnullable(self, sym):
+        #
+        #  For symbols in G_e only.  If we weren't supporting 1.5,
+        #  could just use sym.startswith().
+        #
+        return self._NULLABLE == sym[0:len(self._NULLABLE)]
+
+    def skip(self, (lhs, rhs), pos=0):
+        n = len(rhs)
+        while pos < n:
+            if not self.isnullable(rhs[pos]):
+                break
+            pos = pos + 1
+        return pos
+
+    def makeState(self, state, sym):
+        assert sym is not None
+        #
+        #  Compute \epsilon-kernel state's core and see if
+        #  it exists already.
+        #
+        kitems = []
+        for rule, pos in self.states[state].items:
+            lhs, rhs = rule
+            if rhs[pos:pos+1] == (sym,):
+                kitems.append((rule, self.skip(rule, pos+1)))
+        core = kitems
+
+        core.sort()
+        tcore = tuple(core)
+        if self.cores.has_key(tcore):
+            return self.cores[tcore]
+        #
+        #  Nope, doesn't exist.  Compute it and the associated
+        #  \epsilon-nonkernel state together; we'll need it right away.
+        #
+        k = self.cores[tcore] = len(self.states)
+        K, NK = _State(k, kitems), _State(k+1, [])
+        self.states[k] = K
+        predicted = {}
+
+        edges = self.edges
+        rules = self.newrules
+        for X in K, NK:
+            worklist = X.items
+            for item in worklist:
+                rule, pos = item
+                lhs, rhs = rule
+                if pos == len(rhs):
+                    X.complete.append(rule)
+                    continue
+
+                nextSym = rhs[pos]
+                key = (X.stateno, nextSym)
+                if not rules.has_key(nextSym):
+                    if not edges.has_key(key):
+                        edges[key] = None
+                        X.T.append(nextSym)
+                else:
+                    edges[key] = None
+                    if not predicted.has_key(nextSym):
+                        predicted[nextSym] = 1
+                        for prule in rules[nextSym]:
+                            ppos = self.skip(prule)
+                            new = (prule, ppos)
+                            NK.items.append(new)
+            #
+            #  Problem: we know K needs generating, but we
+            #  don't yet know about NK.  Can't commit anything
+            #  regarding NK to self.edges until we're sure.  Should
+            #  we delay committing on both K and NK to avoid this
+            #  hacky code?  This creates other problems..
+            #
+            if X is K:
+                edges = {}
+
+        if NK.items == []:
+            return k
+
+        #
+        #  Check for \epsilon-nonkernel's core.  Unfortunately we
+        #  need to know the entire set of predicted nonterminals
+        #  to do this without accidentally duplicating states.
+        #
+        core = predicted.keys()
+        core.sort()
+        tcore = tuple(core)
+        if self.cores.has_key(tcore):
+            self.edges[(k, None)] = self.cores[tcore]
+            return k
+
+        nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
+        self.edges.update(edges)
+        self.states[nk] = NK
+        return k
+
+    def goto(self, state, sym):
+        key = (state, sym)
+        if not self.edges.has_key(key):
+            #
+            #  No transitions from state on sym.
+            #
+            return None
+
+        rv = self.edges[key]
+        if rv is None:
+            #
+            #  Target state isn't generated yet.  Remedy this.
+            #
+            rv = self.makeState(state, sym)
+            self.edges[key] = rv
+        return rv
+
+    def gotoT(self, state, t):
+        return [self.goto(state, t)]
+
+    def gotoST(self, state, st):
+        rv = []
+        for t in self.states[state].T:
+            if st == t:
+                rv.append(self.goto(state, t))
+        return rv
+
+    def add(self, set, item, i=None, predecessor=None, causal=None):
+        if predecessor is None:
+            if item not in set:
+                set.append(item)
+        else:
+            key = (item, i)
+            if item not in set:
+                self.links[key] = []
+                set.append(item)
+            self.links[key].append((predecessor, causal))
+
+    def makeSet(self, token, sets, i):
+        cur, next = sets[i], sets[i+1]
+
+        ttype = token is not None and self.typestring(token) or None
+        if ttype is not None:
+            fn, arg = self.gotoT, ttype
+        else:
+            fn, arg = self.gotoST, token
+
+        for item in cur:
+            ptr = (item, i)
+            state, parent = item
+            add = fn(state, arg)
+            for k in add:
+                if k is not None:
+                    self.add(next, (k, parent), i+1, ptr)
+                    nk = self.goto(k, None)
+                    if nk is not None:
+                        self.add(next, (nk, i+1))
+
+            if parent == i:
+                continue
+
+            for rule in self.states[state].complete:
+                lhs, rhs = rule
+                for pitem in sets[parent]:
+                    pstate, pparent = pitem
+                    k = self.goto(pstate, lhs)
+                    if k is not None:
+                        why = (item, i, rule)
+                        pptr = (pitem, parent)
+                        self.add(cur, (k, pparent),
+                                 i, pptr, why)
+                        nk = self.goto(k, None)
+                        if nk is not None:
+                            self.add(cur, (nk, i))
+
+    def makeSet_fast(self, token, sets, i):
+        #
+        #  Call *only* when the entire state machine has been built!
+        #  It relies on self.edges being filled in completely, and
+        #  then duplicates and inlines code to boost speed at the
+        #  cost of extreme ugliness.
+        #
+        cur, next = sets[i], sets[i+1]
+        ttype = token is not None and self.typestring(token) or None
+
+        for item in cur:
+            ptr = (item, i)
+            state, parent = item
+            if ttype is not None:
+                k = self.edges.get((state, ttype), None)
+                if k is not None:
+                    #self.add(next, (k, parent), i+1, ptr)
+                    #INLINED --v
+                    new = (k, parent)
+                    key = (new, i+1)
+                    if new not in next:
+                        self.links[key] = []
+                        next.append(new)
+                    self.links[key].append((ptr, None))
+                    #INLINED --^
+                    #nk = self.goto(k, None)
+                    nk = self.edges.get((k, None), None)
+                    if nk is not None:
+                        #self.add(next, (nk, i+1))
+                        #INLINED --v
+                        new = (nk, i+1)
+                        if new not in next:
+                            next.append(new)
+                        #INLINED --^
+            else:
+                add = self.gotoST(state, token)
+                for k in add:
+                    if k is not None:
+                        self.add(next, (k, parent), i+1, ptr)
+                        #nk = self.goto(k, None)
+                        nk = self.edges.get((k, None), None)
+                        if nk is not None:
+                            self.add(next, (nk, i+1))
+
+            if parent == i:
+                continue
+
+            for rule in self.states[state].complete:
+                lhs, rhs = rule
+                for pitem in sets[parent]:
+                    pstate, pparent = pitem
+                    #k = self.goto(pstate, lhs)
+                    k = self.edges.get((pstate, lhs), None)
+                    if k is not None:
+                        why = (item, i, rule)
+                        pptr = (pitem, parent)
+                        #self.add(cur, (k, pparent),
+                        #        i, pptr, why)
+                        #INLINED --v
+                        new = (k, pparent)
+                        key = (new, i)
+                        if new not in cur:
+                            self.links[key] = []
+                            cur.append(new)
+                        self.links[key].append((pptr, why))
+                        #INLINED --^
+                        #nk = self.goto(k, None)
+                        nk = self.edges.get((k, None), None)
+                        if nk is not None:
+                            #self.add(cur, (nk, i))
+                            #INLINED --v
+                            new = (nk, i)
+                            if new not in cur:
+                                cur.append(new)
+                            #INLINED --^
+
+    def predecessor(self, key, causal):
+        for p, c in self.links[key]:
+            if c == causal:
+                return p
+        assert 0
+
+    def causal(self, key):
+        links = self.links[key]
+        if len(links) == 1:
+            return links[0][1]
+        choices = []
+        rule2cause = {}
+        for p, c in links:
+            rule = c[2]
+            choices.append(rule)
+            rule2cause[rule] = c
+        return rule2cause[self.ambiguity(choices)]
+
+    def deriveEpsilon(self, nt):
+        if len(self.newrules[nt]) > 1:
+            rule = self.ambiguity(self.newrules[nt])
+        else:
+            rule = self.newrules[nt][0]
+        #print rule
+
+        rhs = rule[1]
+        attr = [None] * len(rhs)
+
+        for i in range(len(rhs)-1, -1, -1):
+            attr[i] = self.deriveEpsilon(rhs[i])
+        return self.rule2func[self.new2old[rule]](attr)
+
+    def buildTree(self, nt, item, tokens, k):
+        state, parent = item
+
+        choices = []
+        for rule in self.states[state].complete:
+            if rule[0] == nt:
+                choices.append(rule)
+        rule = choices[0]
+        if len(choices) > 1:
+            rule = self.ambiguity(choices)
+        #print rule
+
+        rhs = rule[1]
+        attr = [None] * len(rhs)
+
+        for i in range(len(rhs)-1, -1, -1):
+            sym = rhs[i]
+            if not self.newrules.has_key(sym):
+                if sym != self._BOF:
+                    attr[i] = tokens[k-1]
+                    key = (item, k)
+                    item, k = self.predecessor(key, None)
+            #elif self.isnullable(sym):
+            elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
+                attr[i] = self.deriveEpsilon(sym)
+            else:
+                key = (item, k)
+                why = self.causal(key)
+                attr[i] = self.buildTree(sym, why[0],
+                                         tokens, why[1])
+                item, k = self.predecessor(key, why)
+        return self.rule2func[self.new2old[rule]](attr)
+
+    def ambiguity(self, rules):
+        #
+        #  XXX - problem here and in collectRules() if the same rule
+        #        appears in >1 method.  Also undefined results if rules
+        #        causing the ambiguity appear in the same method.
+        #
+        sortlist = []
+        name2index = {}
+        for i in range(len(rules)):
+            lhs, rhs = rule = rules[i]
+            name = self.rule2name[self.new2old[rule]]
+            sortlist.append((len(rhs), name))
+            name2index[name] = i
+        sortlist.sort()
+        list = map(lambda (a,b): b, sortlist)
+        return rules[name2index[self.resolve(list)]]
+
+    def resolve(self, list):
+        #
+        #  Resolve ambiguity in favor of the shortest RHS.
+        #  Since we walk the tree from the top down, this
+        #  should effectively resolve in favor of a "shift".
+        #
+        return list[0]
+
+#
+#  GenericASTBuilder automagically constructs a concrete/abstract syntax tree
+#  for a given input.  The extra argument is a class (not an instance!)
+#  which supports the "__setslice__" and "__len__" methods.
+#
+#  XXX - silently overrides any user code in methods.
+#
+
+class GenericASTBuilder(GenericParser):
+    def __init__(self, AST, start):
+        GenericParser.__init__(self, start)
+        self.AST = AST
+
+    def preprocess(self, rule, func):
+        rebind = lambda lhs, self=self: \
+                        lambda args, lhs=lhs, self=self: \
+                                self.buildASTNode(args, lhs)
+        lhs, rhs = rule
+        return rule, rebind(lhs)
+
+    def buildASTNode(self, args, lhs):
+        children = []
+        for arg in args:
+            if isinstance(arg, self.AST):
+                children.append(arg)
+            else:
+                children.append(self.terminal(arg))
+        return self.nonterminal(lhs, children)
+
+    def terminal(self, token):      return token
+
+    def nonterminal(self, type, args):
+        rv = self.AST(type)
+        rv[:len(args)] = args
+        return rv
+
+#
+#  GenericASTTraversal is a Visitor pattern according to Design Patterns.  For
+#  each node it attempts to invoke the method n_<node type>, falling
+#  back onto the default() method if the n_* can't be found.  The preorder
+#  traversal also looks for an exit hook named n_<node type>_exit (no default
+#  routine is called if it's not found).  To prematurely halt traversal
+#  of a subtree, call the prune() method -- this only makes sense for a
+#  preorder traversal.  Node type is determined via the typestring() method.
+#
+
+class GenericASTTraversalPruningException:
+    pass
+
+class GenericASTTraversal:
+    def __init__(self, ast):
+        self.ast = ast
+
+    def typestring(self, node):
+        return node.type
+
+    def prune(self):
+        raise GenericASTTraversalPruningException
+
+    def preorder(self, node=None):
+        if node is None:
+            node = self.ast
+
+        try:
+            name = 'n_' + self.typestring(node)
+            if hasattr(self, name):
+                func = getattr(self, name)
+                func(node)
+            else:
+                self.default(node)
+        except GenericASTTraversalPruningException:
+            return
+
+        for kid in node:
+            self.preorder(kid)
+
+        name = name + '_exit'
+        if hasattr(self, name):
+            func = getattr(self, name)
+            func(node)
+
+    def postorder(self, node=None):
+        if node is None:
+            node = self.ast
+
+        for kid in node:
+            self.postorder(kid)
+
+        name = 'n_' + self.typestring(node)
+        if hasattr(self, name):
+            func = getattr(self, name)
+            func(node)
+        else:
+            self.default(node)
+
+
+    def default(self, node):
+        pass
+
+#
+#  GenericASTMatcher.  AST nodes must have "__getitem__" and "__cmp__"
+#  implemented.
+#
+#  XXX - makes assumptions about how GenericParser walks the parse tree.
+#
+
+class GenericASTMatcher(GenericParser):
+    def __init__(self, start, ast):
+        GenericParser.__init__(self, start)
+        self.ast = ast
+
+    def preprocess(self, rule, func):
+        rebind = lambda func, self=self: \
+                        lambda args, func=func, self=self: \
+                                self.foundMatch(args, func)
+        lhs, rhs = rule
+        rhslist = list(rhs)
+        rhslist.reverse()
+
+        return (lhs, tuple(rhslist)), rebind(func)
+
+    def foundMatch(self, args, func):
+        func(args[-1])
+        return args[-1]
+
+    def match_r(self, node):
+        self.input.insert(0, node)
+        children = 0
+
+        for child in node:
+            if children == 0:
+                self.input.insert(0, '(')
+            children = children + 1
+            self.match_r(child)
+
+        if children > 0:
+            self.input.insert(0, ')')
+
+    def match(self, ast=None):
+        if ast is None:
+            ast = self.ast
+        self.input = []
+
+        self.match_r(ast)
+        self.parse(self.input)
+
+    def resolve(self, list):
+        #
+        #  Resolve ambiguity in favor of the longest RHS.
+        #
+        return list[-1]
+
+def _dump(tokens, sets, states):
+    for i in range(len(sets)):
+        print 'set', i
+        for item in sets[i]:
+            print '\t', item
+            for (lhs, rhs), pos in states[item[0]].items:
+                print '\t\t', lhs, '::=',
+                print string.join(rhs[:pos]),
+                print '.',
+                print string.join(rhs[pos:])
+        if i < len(tokens):
+            print
+            print 'token', str(tokens[i])
+            print
diff --git a/sys/src/cmd/python/Parser/tokenizer.c b/sys/src/cmd/python/Parser/tokenizer.c
new file mode 100644
index 000000000..c58b6899b
--- /dev/null
+++ b/sys/src/cmd/python/Parser/tokenizer.c
@@ -0,0 +1,1535 @@
+
+/* Tokenizer implementation */
+
+#include "Python.h"
+#include "pgenheaders.h"
+
+#include <ctype.h>
+#include <assert.h>
+
+#include "tokenizer.h"
+#include "errcode.h"
+
+#ifndef PGEN
+#include "unicodeobject.h"
+#include "stringobject.h"
+#include "fileobject.h"
+#include "codecs.h"
+#include "abstract.h"
+#endif /* PGEN */
+
+extern char *PyOS_Readline(FILE *, FILE *, char *);
+/* Return malloc'ed string including trailing \n;
+   empty malloc'ed string for EOF;
+   NULL if interrupted */
+
+/* Don't ever change this -- it would break the portability of Python code */
+#define TABSIZE 8
+
+/* Convert a possibly signed character to a nonnegative int */
+/* XXX This assumes characters are 8 bits wide */
+#ifdef __CHAR_UNSIGNED__
+#define Py_CHARMASK(c)		(c)
+#else
+#define Py_CHARMASK(c)		((c) & 0xff)
+#endif
+
+/* Forward */
+static struct tok_state *tok_new(void);
+static int tok_nextc(struct tok_state *tok);
+static void tok_backup(struct tok_state *tok, int c);
+
+/* Token names */
+
+char *_PyParser_TokenNames[] = {
+	"ENDMARKER",
+	"NAME",
+	"NUMBER",
+	"STRING",
+	"NEWLINE",
+	"INDENT",
+	"DEDENT",
+	"LPAR",
+	"RPAR",
+	"LSQB",
+	"RSQB",
+	"COLON",
+	"COMMA",
+	"SEMI",
+	"PLUS",
+	"MINUS",
+	"STAR",
+	"SLASH",
+	"VBAR",
+	"AMPER",
+	"LESS",
+	"GREATER",
+	"EQUAL",
+	"DOT",
+	"PERCENT",
+	"BACKQUOTE",
+	"LBRACE",
+	"RBRACE",
+	"EQEQUAL",
+	"NOTEQUAL",
+	"LESSEQUAL",
+	"GREATEREQUAL",
+	"TILDE",
+	"CIRCUMFLEX",
+	"LEFTSHIFT",
+	"RIGHTSHIFT",
+	"DOUBLESTAR",
+	"PLUSEQUAL",
+	"MINEQUAL",
+	"STAREQUAL",
+	"SLASHEQUAL",
+	"PERCENTEQUAL",
+	"AMPEREQUAL",
+	"VBAREQUAL",
+	"CIRCUMFLEXEQUAL",
+	"LEFTSHIFTEQUAL",
+	"RIGHTSHIFTEQUAL",
+	"DOUBLESTAREQUAL",
+	"DOUBLESLASH",
+	"DOUBLESLASHEQUAL",
+	"AT",
+	/* This table must match the #defines in token.h! */
+	"OP",
+	"<ERRORTOKEN>",
+	"<N_TOKENS>"
+};
+
+
+/* Create and initialize a new tok_state structure */
+
+static struct tok_state *
+tok_new(void)
+{
+	struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
+                                                sizeof(struct tok_state));
+	if (tok == NULL)
+		return NULL;
+	tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
+	tok->done = E_OK;
+	tok->fp = NULL;
+	tok->tabsize = TABSIZE;
+	tok->indent = 0;
+	tok->indstack[0] = 0;
+	tok->atbol = 1;
+	tok->pendin = 0;
+	tok->prompt = tok->nextprompt = NULL;
+	tok->lineno = 0;
+	tok->level = 0;
+	tok->filename = NULL;
+	tok->altwarning = 0;
+	tok->alterror = 0;
+	tok->alttabsize = 1;
+	tok->altindstack[0] = 0;
+	tok->decoding_state = 0;
+	tok->decoding_erred = 0;
+	tok->read_coding_spec = 0;
+	tok->encoding = NULL;
+        tok->cont_line = 0;
+#ifndef PGEN
+	tok->decoding_readline = NULL;
+	tok->decoding_buffer = NULL;
+#endif
+	return tok;
+}
+
+#ifdef PGEN
+
+static char *
+decoding_fgets(char *s, int size, struct tok_state *tok)
+{
+	return fgets(s, size, tok->fp);
+}
+
+static int
+decoding_feof(struct tok_state *tok)
+{
+	return feof(tok->fp);
+}
+
+static const char *
+decode_str(const char *str, struct tok_state *tok)
+{
+	return str;
+}
+
+#else /* PGEN */
+
+static char *
+error_ret(struct tok_state *tok) /* XXX */
+{
+	tok->decoding_erred = 1;
+	if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
+		PyMem_FREE(tok->buf);
+	tok->buf = NULL;
+	return NULL;		/* as if it were EOF */
+}
+
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+	char* result = (char *)PyMem_MALLOC(len + 1);
+	if (result != NULL) {
+		memcpy(result, s, len);
+		result[len] = '\0';
+	}
+	return result;
+}
+
+static char *
+get_normal_name(char *s)	/* for utf-8 and latin-1 */
+{
+	char buf[13];
+	int i;
+	for (i = 0; i < 12; i++) {
+		int c = s[i];
+		if (c == '\0') break;
+		else if (c == '_') buf[i] = '-';
+		else buf[i] = tolower(c);
+	}
+	buf[i] = '\0';
+	if (strcmp(buf, "utf-8") == 0 ||
+	    strncmp(buf, "utf-8-", 6) == 0) return "utf-8";
+	else if (strcmp(buf, "latin-1") == 0 ||
+		 strcmp(buf, "iso-8859-1") == 0 ||
+		 strcmp(buf, "iso-latin-1") == 0 ||
+		 strncmp(buf, "latin-1-", 8) == 0 ||
+		 strncmp(buf, "iso-8859-1-", 11) == 0 ||
+		 strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1";
+	else return s;
+}
+
+/* Return the coding spec in S, or NULL if none is found.  */
+
+static char *
+get_coding_spec(const char *s, Py_ssize_t size)
+{
+	Py_ssize_t i;
+	/* Coding spec must be in a comment, and that comment must be
+         * the only statement on the source code line. */
+        for (i = 0; i < size - 6; i++) {
+		if (s[i] == '#')
+			break;
+		if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
+			return NULL;
+	}
+	for (; i < size - 6; i++) { /* XXX inefficient search */
+		const char* t = s + i;
+		if (strncmp(t, "coding", 6) == 0) {
+			const char* begin = NULL;
+			t += 6;
+			if (t[0] != ':' && t[0] != '=')
+				continue;
+			do {
+				t++;
+			} while (t[0] == '\x20' || t[0] == '\t');
+
+			begin = t;
+			while (isalnum(Py_CHARMASK(t[0])) ||
+			       t[0] == '-' || t[0] == '_' || t[0] == '.')
+				t++;
+
+			if (begin < t) {
+				char* r = new_string(begin, t - begin);
+				char* q = get_normal_name(r);
+				if (r != q) {
+					PyMem_FREE(r);
+					r = new_string(q, strlen(q));
+				}
+				return r;
+			}
+		}
+	}
+	return NULL;
+}
+
+/* Check whether the line contains a coding spec. If it does,
+   invoke the set_readline function for the new encoding.
+   This function receives the tok_state and the new encoding.
+   Return 1 on success, 0 on failure.  */
+
+static int
+check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
+		  int set_readline(struct tok_state *, const char *))
+{
+	char * cs;
+	int r = 1;
+
+        if (tok->cont_line)
+		/* It's a continuation line, so it can't be a coding spec. */
+		return 1;
+	cs = get_coding_spec(line, size);
+	if (cs != NULL) {
+		tok->read_coding_spec = 1;
+		if (tok->encoding == NULL) {
+			assert(tok->decoding_state == 1); /* raw */
+			if (strcmp(cs, "utf-8") == 0 ||
+			    strcmp(cs, "iso-8859-1") == 0) {
+				tok->encoding = cs;
+			} else {
+#ifdef Py_USING_UNICODE
+				r = set_readline(tok, cs);
+				if (r) {
+					tok->encoding = cs;
+					tok->decoding_state = -1;
+				}
+				else
+					PyMem_FREE(cs);
+#else
+                                /* Without Unicode support, we cannot
+                                   process the coding spec. Since there
+                                   won't be any Unicode literals, that
+                                   won't matter. */
+				PyMem_FREE(cs);
+#endif
+			}
+		} else {	/* then, compare cs with BOM */
+			r = (strcmp(tok->encoding, cs) == 0);
+			PyMem_FREE(cs);
+		}
+	}
+	if (!r) {
+		cs = tok->encoding;
+		if (!cs)
+			cs = "with BOM";
+		PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
+	}
+	return r;
+}
+
+/* See whether the file starts with a BOM. If it does,
+   invoke the set_readline function with the new encoding.
+   Return 1 on success, 0 on failure.  */
+
+static int
+check_bom(int get_char(struct tok_state *),
+	  void unget_char(int, struct tok_state *),
+	  int set_readline(struct tok_state *, const char *),
+	  struct tok_state *tok)
+{
+	int ch = get_char(tok);
+	tok->decoding_state = 1;
+	if (ch == EOF) {
+		return 1;
+	} else if (ch == 0xEF) {
+		ch = get_char(tok); if (ch != 0xBB) goto NON_BOM;
+		ch = get_char(tok); if (ch != 0xBF) goto NON_BOM;
+#if 0
+	/* Disable support for UTF-16 BOMs until a decision
+	   is made whether this needs to be supported.  */
+	} else if (ch == 0xFE) {
+		ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
+		if (!set_readline(tok, "utf-16-be")) return 0;
+		tok->decoding_state = -1;
+	} else if (ch == 0xFF) {
+		ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
+		if (!set_readline(tok, "utf-16-le")) return 0;
+		tok->decoding_state = -1;
+#endif
+	} else {
+		unget_char(ch, tok);
+		return 1;
+	}
+	if (tok->encoding != NULL)
+		PyMem_FREE(tok->encoding);
+	tok->encoding = new_string("utf-8", 5);	/* resulting is in utf-8 */
+	return 1;
+  NON_BOM:
+	/* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */
+	unget_char(0xFF, tok);	/* XXX this will cause a syntax error */
+	return 1;
+}
+
+/* Read a line of text from TOK into S, using the stream in TOK.
+   Return NULL on failure, else S.
+
+   On entry, tok->decoding_buffer will be one of:
+     1) NULL: need to call tok->decoding_readline to get a new line
+     2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
+           stored the result in tok->decoding_buffer
+     3) PyStringObject *: previous call to fp_readl did not have enough room
+           (in the s buffer) to copy entire contents of the line read
+           by tok->decoding_readline.  tok->decoding_buffer has the overflow.
+           In this case, fp_readl is called in a loop (with an expanded buffer)
+           until the buffer ends with a '\n' (or until the end of the file is
+           reached): see tok_nextc and its calls to decoding_fgets.
+*/
+
+static char *
+fp_readl(char *s, int size, struct tok_state *tok)
+{
+#ifndef Py_USING_UNICODE
+	/* In a non-Unicode built, this should never be called. */
+	Py_FatalError("fp_readl should not be called in this build.");
+	return NULL; /* Keep compiler happy (not reachable) */
+#else
+	PyObject* utf8 = NULL;
+	PyObject* buf = tok->decoding_buffer;
+	char *str;
+	Py_ssize_t utf8len;
+
+	/* Ask for one less byte so we can terminate it */
+	assert(size > 0);
+	size--;
+
+	if (buf == NULL) {
+		buf = PyObject_CallObject(tok->decoding_readline, NULL);
+		if (buf == NULL)
+			return error_ret(tok);
+	} else {
+		tok->decoding_buffer = NULL;
+		if (PyString_CheckExact(buf))
+			utf8 = buf;
+	}
+	if (utf8 == NULL) {
+		utf8 = PyUnicode_AsUTF8String(buf);
+		Py_DECREF(buf);
+		if (utf8 == NULL)
+			return error_ret(tok);
+	}
+	str = PyString_AsString(utf8);
+	utf8len = PyString_GET_SIZE(utf8);
+	if (utf8len > size) {
+		tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
+		if (tok->decoding_buffer == NULL) {
+			Py_DECREF(utf8);
+			return error_ret(tok);
+		}
+		utf8len = size;
+	}
+	memcpy(s, str, utf8len);
+	s[utf8len] = '\0';
+	Py_DECREF(utf8);
+	if (utf8len == 0) return NULL; /* EOF */
+	return s;
+#endif
+}
+
+/* Set the readline function for TOK to a StreamReader's
+   readline function. The StreamReader is named ENC.
+
+   This function is called from check_bom and check_coding_spec.
+
+   ENC is usually identical to the future value of tok->encoding,
+   except for the (currently unsupported) case of UTF-16.
+
+   Return 1 on success, 0 on failure. */
+
+static int
+fp_setreadl(struct tok_state *tok, const char* enc)
+{
+	PyObject *reader, *stream, *readline;
+
+	/* XXX: constify filename argument. */
+	stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
+	if (stream == NULL)
+		return 0;
+
+	reader = PyCodec_StreamReader(enc, stream, NULL);
+	Py_DECREF(stream);
+	if (reader == NULL)
+		return 0;
+
+	readline = PyObject_GetAttrString(reader, "readline");
+	Py_DECREF(reader);
+	if (readline == NULL)
+		return 0;
+
+	tok->decoding_readline = readline;
+	return 1;
+}
+
+/* Fetch the next byte from TOK. */
+
+static int fp_getc(struct tok_state *tok) {
+	return getc(tok->fp);
+}
+
+/* Unfetch the last byte back into TOK.  */
+
+static void fp_ungetc(int c, struct tok_state *tok) {
+	ungetc(c, tok->fp);
+}
+
+/* Read a line of input from TOK. Determine encoding
+   if necessary.  */
+
+static char *
+decoding_fgets(char *s, int size, struct tok_state *tok)
+{
+	char *line = NULL;
+	int badchar = 0;
+	for (;;) {
+		if (tok->decoding_state < 0) {
+			/* We already have a codec associated with
+			   this input. */
+			line = fp_readl(s, size, tok);
+			break;
+		} else if (tok->decoding_state > 0) {
+			/* We want a 'raw' read. */
+			line = Py_UniversalNewlineFgets(s, size,
+							tok->fp, NULL);
+			break;
+		} else {
+			/* We have not yet determined the encoding.
+			   If an encoding is found, use the file-pointer
+			   reader functions from now on. */
+			if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
+				return error_ret(tok);
+			assert(tok->decoding_state != 0);
+		}
+	}
+	if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
+		if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
+			return error_ret(tok);
+		}
+	}
+#ifndef PGEN
+	/* The default encoding is ASCII, so make sure we don't have any
+           non-ASCII bytes in it. */
+	if (line && !tok->encoding) {
+		unsigned char *c;
+		for (c = (unsigned char *)line; *c; c++)
+			if (*c > 127) {
+				badchar = *c;
+				break;
+			}
+	}
+	if (badchar) {
+		char buf[500];
+		/* Need to add 1 to the line number, since this line
+		   has not been counted, yet.  */
+		sprintf(buf,
+			"Non-ASCII character '\\x%.2x' "
+			"in file %.200s on line %i, "
+			"but no encoding declared; "
+			"see http://www.python.org/peps/pep-0263.html for details",
+			badchar, tok->filename, tok->lineno + 1);
+		PyErr_SetString(PyExc_SyntaxError, buf);
+		return error_ret(tok);
+	}
+#endif
+	return line;
+}
+
+static int
+decoding_feof(struct tok_state *tok)
+{
+	if (tok->decoding_state >= 0) {
+		return feof(tok->fp);
+	} else {
+		PyObject* buf = tok->decoding_buffer;
+		if (buf == NULL) {
+			buf = PyObject_CallObject(tok->decoding_readline, NULL);
+			if (buf == NULL) {
+				error_ret(tok);
+				return 1;
+			} else {
+				tok->decoding_buffer = buf;
+			}
+		}
+		return PyObject_Length(buf) == 0;
+	}
+}
+
+/* Fetch a byte from TOK, using the string buffer. */
+
+static int
+buf_getc(struct tok_state *tok) {
+	return Py_CHARMASK(*tok->str++);
+}
+
+/* Unfetch a byte from TOK, using the string buffer. */
+
+static void
+buf_ungetc(int c, struct tok_state *tok) {
+	tok->str--;
+	assert(Py_CHARMASK(*tok->str) == c);	/* tok->cur may point to read-only segment */
+}
+
+/* Set the readline function for TOK to ENC. For the string-based
+   tokenizer, this means to just record the encoding. */
+
+static int
+buf_setreadl(struct tok_state *tok, const char* enc) {
+	tok->enc = enc;
+	return 1;
+}
+
+/* Return a UTF-8 encoding Python string object from the
+   C byte string STR, which is encoded with ENC. */
+
+#ifdef Py_USING_UNICODE
+static PyObject *
+translate_into_utf8(const char* str, const char* enc) {
+	PyObject *utf8;
+	PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
+	if (buf == NULL)
+		return NULL;
+	utf8 = PyUnicode_AsUTF8String(buf);
+	Py_DECREF(buf);
+	return utf8;
+}
+#endif
+
+/* Decode a byte string STR for use as the buffer of TOK.
+   Look for encoding declarations inside STR, and record them
+   inside TOK.  */
+
+static const char *
+decode_str(const char *str, struct tok_state *tok)
+{
+	PyObject* utf8 = NULL;
+	const char *s;
+	int lineno = 0;
+	tok->enc = NULL;
+	tok->str = str;
+	if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
+		return error_ret(tok);
+	str = tok->str;		/* string after BOM if any */
+	assert(str);
+#ifdef Py_USING_UNICODE
+	if (tok->enc != NULL) {
+		utf8 = translate_into_utf8(str, tok->enc);
+		if (utf8 == NULL)
+			return error_ret(tok);
+		str = PyString_AsString(utf8);
+	}
+#endif
+	for (s = str;; s++) {
+		if (*s == '\0') break;
+		else if (*s == '\n') {
+			lineno++;
+			if (lineno == 2) break;
+		}
+	}
+	tok->enc = NULL;
+	if (!check_coding_spec(str, s - str, tok, buf_setreadl))
+		return error_ret(tok);
+#ifdef Py_USING_UNICODE
+	if (tok->enc != NULL) {
+		assert(utf8 == NULL);
+		utf8 = translate_into_utf8(str, tok->enc);
+		if (utf8 == NULL) {
+			PyErr_Format(PyExc_SyntaxError,
+				"unknown encoding: %s", tok->enc);
+			return error_ret(tok);
+		}
+		str = PyString_AsString(utf8);
+	}
+#endif
+	assert(tok->decoding_buffer == NULL);
+	tok->decoding_buffer = utf8; /* CAUTION */
+	return str;
+}
+
+#endif /* PGEN */
+
+/* Set up tokenizer for string */
+
+struct tok_state *
+PyTokenizer_FromString(const char *str)
+{
+	struct tok_state *tok = tok_new();
+	if (tok == NULL)
+		return NULL;
+	str = (char *)decode_str(str, tok);
+	if (str == NULL) {
+		PyTokenizer_Free(tok);
+		return NULL;
+	}
+
+	/* XXX: constify members. */
+	tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+	return tok;
+}
+
+
+/* Set up tokenizer for file */
+
+struct tok_state *
+PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
+{
+	struct tok_state *tok = tok_new();
+	if (tok == NULL)
+		return NULL;
+	if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
+		PyTokenizer_Free(tok);
+		return NULL;
+	}
+	tok->cur = tok->inp = tok->buf;
+	tok->end = tok->buf + BUFSIZ;
+	tok->fp = fp;
+	tok->prompt = ps1;
+	tok->nextprompt = ps2;
+	return tok;
+}
+
+
+/* Free a tok_state structure */
+
+void
+PyTokenizer_Free(struct tok_state *tok)
+{
+	if (tok->encoding != NULL)
+		PyMem_FREE(tok->encoding);
+#ifndef PGEN
+	Py_XDECREF(tok->decoding_readline);
+	Py_XDECREF(tok->decoding_buffer);
+#endif
+	if (tok->fp != NULL && tok->buf != NULL)
+		PyMem_FREE(tok->buf);
+	PyMem_FREE(tok);
+}
+
+#if !defined(PGEN) && defined(Py_USING_UNICODE)
+static int
+tok_stdin_decode(struct tok_state *tok, char **inp)
+{
+	PyObject *enc, *sysstdin, *decoded, *utf8;
+	const char *encoding;
+	char *converted;
+
+	if (PySys_GetFile((char *)"stdin", NULL) != stdin)
+		return 0;
+	sysstdin = PySys_GetObject("stdin");
+	if (sysstdin == NULL || !PyFile_Check(sysstdin))
+		return 0;
+
+	enc = ((PyFileObject *)sysstdin)->f_encoding;
+	if (enc == NULL || !PyString_Check(enc))
+		return 0;
+	Py_INCREF(enc);
+
+	encoding = PyString_AsString(enc);
+	decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
+	if (decoded == NULL)
+		goto error_clear;
+
+	utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
+	Py_DECREF(decoded);
+	if (utf8 == NULL)
+		goto error_clear;
+
+	assert(PyString_Check(utf8));
+	converted = new_string(PyString_AS_STRING(utf8),
+			       PyString_GET_SIZE(utf8));
+	Py_DECREF(utf8);
+	if (converted == NULL)
+		goto error_nomem;
+
+	PyMem_FREE(*inp);
+	*inp = converted;
+	if (tok->encoding != NULL)
+		PyMem_FREE(tok->encoding);
+	tok->encoding = new_string(encoding, strlen(encoding));
+	if (tok->encoding == NULL)
+		goto error_nomem;
+
+	Py_DECREF(enc);
+	return 0;
+
+error_nomem:
+	Py_DECREF(enc);
+	tok->done = E_NOMEM;
+	return -1;
+
+error_clear:
+	/* Fallback to iso-8859-1: for backward compatibility */
+	Py_DECREF(enc);
+	PyErr_Clear();
+	return 0;
+}
+#endif
+
+/* Get next char, updating state; error code goes into tok->done */
+
+static int
+tok_nextc(register struct tok_state *tok)
+{
+	for (;;) {
+		if (tok->cur != tok->inp) {
+			return Py_CHARMASK(*tok->cur++); /* Fast path */
+		}
+		if (tok->done != E_OK)
+			return EOF;
+		if (tok->fp == NULL) {
+			char *end = strchr(tok->inp, '\n');
+			if (end != NULL)
+				end++;
+			else {
+				end = strchr(tok->inp, '\0');
+				if (end == tok->inp) {
+					tok->done = E_EOF;
+					return EOF;
+				}
+			}
+			if (tok->start == NULL)
+				tok->buf = tok->cur;
+			tok->line_start = tok->cur;
+			tok->lineno++;
+			tok->inp = end;
+			return Py_CHARMASK(*tok->cur++);
+		}
+		if (tok->prompt != NULL) {
+			char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
+			if (tok->nextprompt != NULL)
+				tok->prompt = tok->nextprompt;
+			if (newtok == NULL)
+				tok->done = E_INTR;
+			else if (*newtok == '\0') {
+				PyMem_FREE(newtok);
+				tok->done = E_EOF;
+			}
+#if !defined(PGEN) && defined(Py_USING_UNICODE)
+			else if (tok_stdin_decode(tok, &newtok) != 0)
+				PyMem_FREE(newtok);
+#endif
+			else if (tok->start != NULL) {
+				size_t start = tok->start - tok->buf;
+				size_t oldlen = tok->cur - tok->buf;
+				size_t newlen = oldlen + strlen(newtok);
+				char *buf = tok->buf;
+				buf = (char *)PyMem_REALLOC(buf, newlen+1);
+				tok->lineno++;
+				if (buf == NULL) {
+					PyMem_FREE(tok->buf);
+					tok->buf = NULL;
+					PyMem_FREE(newtok);
+					tok->done = E_NOMEM;
+					return EOF;
+				}
+				tok->buf = buf;
+				tok->cur = tok->buf + oldlen;
+				tok->line_start = tok->cur;
+				strcpy(tok->buf + oldlen, newtok);
+				PyMem_FREE(newtok);
+				tok->inp = tok->buf + newlen;
+				tok->end = tok->inp + 1;
+				tok->start = tok->buf + start;
+			}
+			else {
+				tok->lineno++;
+				if (tok->buf != NULL)
+					PyMem_FREE(tok->buf);
+				tok->buf = newtok;
+				tok->line_start = tok->buf;
+				tok->cur = tok->buf;
+				tok->line_start = tok->buf;
+				tok->inp = strchr(tok->buf, '\0');
+				tok->end = tok->inp + 1;
+			}
+		}
+		else {
+			int done = 0;
+			Py_ssize_t cur = 0;
+			char *pt;
+			if (tok->start == NULL) {
+				if (tok->buf == NULL) {
+					tok->buf = (char *)
+						PyMem_MALLOC(BUFSIZ);
+					if (tok->buf == NULL) {
+						tok->done = E_NOMEM;
+						return EOF;
+					}
+					tok->end = tok->buf + BUFSIZ;
+				}
+				if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
+					  tok) == NULL) {
+					tok->done = E_EOF;
+					done = 1;
+				}
+				else {
+					tok->done = E_OK;
+					tok->inp = strchr(tok->buf, '\0');
+					done = tok->inp[-1] == '\n';
+				}
+			}
+			else {
+				cur = tok->cur - tok->buf;
+				if (decoding_feof(tok)) {
+					tok->done = E_EOF;
+					done = 1;
+				}
+				else
+					tok->done = E_OK;
+			}
+			tok->lineno++;
+			/* Read until '\n' or EOF */
+			while (!done) {
+				Py_ssize_t curstart = tok->start == NULL ? -1 :
+					          tok->start - tok->buf;
+				Py_ssize_t curvalid = tok->inp - tok->buf;
+				Py_ssize_t newsize = curvalid + BUFSIZ;
+				char *newbuf = tok->buf;
+				newbuf = (char *)PyMem_REALLOC(newbuf,
+							       newsize);
+				if (newbuf == NULL) {
+					tok->done = E_NOMEM;
+					tok->cur = tok->inp;
+					return EOF;
+				}
+				tok->buf = newbuf;
+				tok->inp = tok->buf + curvalid;
+				tok->end = tok->buf + newsize;
+				tok->start = curstart < 0 ? NULL :
+					     tok->buf + curstart;
+				if (decoding_fgets(tok->inp,
+					       (int)(tok->end - tok->inp),
+					       tok) == NULL) {
+					/* Break out early on decoding
+					   errors, as tok->buf will be NULL
+					 */
+					if (tok->decoding_erred)
+						return EOF;
+					/* Last line does not end in \n,
+					   fake one */
+					strcpy(tok->inp, "\n");
+				}
+				tok->inp = strchr(tok->inp, '\0');
+				done = tok->inp[-1] == '\n';
+			}
+			if (tok->buf != NULL) {
+				tok->cur = tok->buf + cur;
+				tok->line_start = tok->cur;
+				/* replace "\r\n" with "\n" */
+				/* For Mac leave the \r, giving syntax error */
+				pt = tok->inp - 2;
+				if (pt >= tok->buf && *pt == '\r') {
+					*pt++ = '\n';
+					*pt = '\0';
+					tok->inp = pt;
+				}
+			}
+		}
+		if (tok->done != E_OK) {
+			if (tok->prompt != NULL)
+				PySys_WriteStderr("\n");
+			tok->cur = tok->inp;
+			return EOF;
+		}
+	}
+	/*NOTREACHED*/
+}
+
+
+/* Back-up one character */
+
+static void
+tok_backup(register struct tok_state *tok, register int c)
+{
+	if (c != EOF) {
+		if (--tok->cur < tok->buf)
+			Py_FatalError("tok_backup: begin of buffer");
+		if (*tok->cur != c)
+			*tok->cur = c;
+	}
+}
+
+
+/* Return the token corresponding to a single character */
+
+int
+PyToken_OneChar(int c)
+{
+	switch (c) {
+	case '(':	return LPAR;
+	case ')':	return RPAR;
+	case '[':	return LSQB;
+	case ']':	return RSQB;
+	case ':':	return COLON;
+	case ',':	return COMMA;
+	case ';':	return SEMI;
+	case '+':	return PLUS;
+	case '-':	return MINUS;
+	case '*':	return STAR;
+	case '/':	return SLASH;
+	case '|':	return VBAR;
+	case '&':	return AMPER;
+	case '<':	return LESS;
+	case '>':	return GREATER;
+	case '=':	return EQUAL;
+	case '.':	return DOT;
+	case '%':	return PERCENT;
+	case '`':	return BACKQUOTE;
+	case '{':	return LBRACE;
+	case '}':	return RBRACE;
+	case '^':	return CIRCUMFLEX;
+	case '~':	return TILDE;
+	case '@':       return AT;
+	default:	return OP;
+	}
+}
+
+
+int
+PyToken_TwoChars(int c1, int c2)
+{
+	switch (c1) {
+	case '=':
+		switch (c2) {
+		case '=':	return EQEQUAL;
+		}
+		break;
+	case '!':
+		switch (c2) {
+		case '=':	return NOTEQUAL;
+		}
+		break;
+	case '<':
+		switch (c2) {
+		case '>':	return NOTEQUAL;
+		case '=':	return LESSEQUAL;
+		case '<':	return LEFTSHIFT;
+		}
+		break;
+	case '>':
+		switch (c2) {
+		case '=':	return GREATEREQUAL;
+		case '>':	return RIGHTSHIFT;
+		}
+		break;
+	case '+':
+		switch (c2) {
+		case '=':	return PLUSEQUAL;
+		}
+		break;
+	case '-':
+		switch (c2) {
+		case '=':	return MINEQUAL;
+		}
+		break;
+	case '*':
+		switch (c2) {
+		case '*':	return DOUBLESTAR;
+		case '=':	return STAREQUAL;
+		}
+		break;
+	case '/':
+		switch (c2) {
+		case '/':	return DOUBLESLASH;
+		case '=':	return SLASHEQUAL;
+		}
+		break;
+	case '|':
+		switch (c2) {
+		case '=':	return VBAREQUAL;
+		}
+		break;
+	case '%':
+		switch (c2) {
+		case '=':	return PERCENTEQUAL;
+		}
+		break;
+	case '&':
+		switch (c2) {
+		case '=':	return AMPEREQUAL;
+		}
+		break;
+	case '^':
+		switch (c2) {
+		case '=':	return CIRCUMFLEXEQUAL;
+		}
+		break;
+	}
+	return OP;
+}
+
+int
+PyToken_ThreeChars(int c1, int c2, int c3)
+{
+	switch (c1) {
+	case '<':
+		switch (c2) {
+		case '<':
+			switch (c3) {
+			case '=':
+				return LEFTSHIFTEQUAL;
+			}
+			break;
+		}
+		break;
+	case '>':
+		switch (c2) {
+		case '>':
+			switch (c3) {
+			case '=':
+				return RIGHTSHIFTEQUAL;
+			}
+			break;
+		}
+		break;
+	case '*':
+		switch (c2) {
+		case '*':
+			switch (c3) {
+			case '=':
+				return DOUBLESTAREQUAL;
+			}
+			break;
+		}
+		break;
+	case '/':
+		switch (c2) {
+		case '/':
+			switch (c3) {
+			case '=':
+				return DOUBLESLASHEQUAL;
+			}
+			break;
+		}
+		break;
+	}
+	return OP;
+}
+
+static int
+indenterror(struct tok_state *tok)
+{
+	if (tok->alterror) {
+		tok->done = E_TABSPACE;
+		tok->cur = tok->inp;
+		return 1;
+	}
+	if (tok->altwarning) {
+		PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
+                                  "in indentation\n", tok->filename);
+		tok->altwarning = 0;
+	}
+	return 0;
+}
+
+
+/* Get next token, after space stripping etc. */
+
+static int
+tok_get(register struct tok_state *tok, char **p_start, char **p_end)
+{
+	register int c;
+	int blankline;
+
+	*p_start = *p_end = NULL;
+  nextline:
+	tok->start = NULL;
+	blankline = 0;
+
+	/* Get indentation level */
+	if (tok->atbol) {
+		register int col = 0;
+		register int altcol = 0;
+		tok->atbol = 0;
+		for (;;) {
+			c = tok_nextc(tok);
+			if (c == ' ')
+				col++, altcol++;
+			else if (c == '\t') {
+				col = (col/tok->tabsize + 1) * tok->tabsize;
+				altcol = (altcol/tok->alttabsize + 1)
+					* tok->alttabsize;
+			}
+			else if (c == '\014') /* Control-L (formfeed) */
+				col = altcol = 0; /* For Emacs users */
+			else
+				break;
+		}
+		tok_backup(tok, c);
+		if (c == '#' || c == '\n') {
+			/* Lines with only whitespace and/or comments
+			   shouldn't affect the indentation and are
+			   not passed to the parser as NEWLINE tokens,
+			   except *totally* empty lines in interactive
+			   mode, which signal the end of a command group. */
+			if (col == 0 && c == '\n' && tok->prompt != NULL)
+				blankline = 0; /* Let it through */
+			else
+				blankline = 1; /* Ignore completely */
+			/* We can't jump back right here since we still
+			   may need to skip to the end of a comment */
+		}
+		if (!blankline && tok->level == 0) {
+			if (col == tok->indstack[tok->indent]) {
+				/* No change */
+				if (altcol != tok->altindstack[tok->indent]) {
+					if (indenterror(tok))
+						return ERRORTOKEN;
+				}
+			}
+			else if (col > tok->indstack[tok->indent]) {
+				/* Indent -- always one */
+				if (tok->indent+1 >= MAXINDENT) {
+					tok->done = E_TOODEEP;
+					tok->cur = tok->inp;
+					return ERRORTOKEN;
+				}
+				if (altcol <= tok->altindstack[tok->indent]) {
+					if (indenterror(tok))
+						return ERRORTOKEN;
+				}
+				tok->pendin++;
+				tok->indstack[++tok->indent] = col;
+				tok->altindstack[tok->indent] = altcol;
+			}
+			else /* col < tok->indstack[tok->indent] */ {
+				/* Dedent -- any number, must be consistent */
+				while (tok->indent > 0 &&
+					col < tok->indstack[tok->indent]) {
+					tok->pendin--;
+					tok->indent--;
+				}
+				if (col != tok->indstack[tok->indent]) {
+					tok->done = E_DEDENT;
+					tok->cur = tok->inp;
+					return ERRORTOKEN;
+				}
+				if (altcol != tok->altindstack[tok->indent]) {
+					if (indenterror(tok))
+						return ERRORTOKEN;
+				}
+			}
+		}
+	}
+
+	tok->start = tok->cur;
+
+	/* Return pending indents/dedents */
+	if (tok->pendin != 0) {
+		if (tok->pendin < 0) {
+			tok->pendin++;
+			return DEDENT;
+		}
+		else {
+			tok->pendin--;
+			return INDENT;
+		}
+	}
+
+ again:
+	tok->start = NULL;
+	/* Skip spaces */
+	do {
+		c = tok_nextc(tok);
+	} while (c == ' ' || c == '\t' || c == '\014');
+
+	/* Set start of current token */
+	tok->start = tok->cur - 1;
+
+	/* Skip comment, while looking for tab-setting magic */
+	if (c == '#') {
+		static char *tabforms[] = {
+			"tab-width:",		/* Emacs */
+			":tabstop=",		/* vim, full form */
+			":ts=",			/* vim, abbreviated form */
+			"set tabsize=",		/* will vi never die? */
+		/* more templates can be added here to support other editors */
+		};
+		char cbuf[80];
+		char *tp, **cp;
+		tp = cbuf;
+		do {
+			*tp++ = c = tok_nextc(tok);
+		} while (c != EOF && c != '\n' &&
+			 (size_t)(tp - cbuf + 1) < sizeof(cbuf));
+		*tp = '\0';
+		for (cp = tabforms;
+		     cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
+		     cp++) {
+			if ((tp = strstr(cbuf, *cp))) {
+				int newsize = atoi(tp + strlen(*cp));
+
+				if (newsize >= 1 && newsize <= 40) {
+					tok->tabsize = newsize;
+					if (Py_VerboseFlag)
+					    PySys_WriteStderr(
+						"Tab size set to %d\n",
+						newsize);
+				}
+			}
+		}
+		while (c != EOF && c != '\n')
+			c = tok_nextc(tok);
+	}
+
+	/* Check for EOF and errors now */
+	if (c == EOF) {
+		return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
+	}
+
+	/* Identifier (most frequent token!) */
+	if (isalpha(c) || c == '_') {
+		/* Process r"", u"" and ur"" */
+		switch (c) {
+		case 'r':
+		case 'R':
+			c = tok_nextc(tok);
+			if (c == '"' || c == '\'')
+				goto letter_quote;
+			break;
+		case 'u':
+		case 'U':
+			c = tok_nextc(tok);
+			if (c == 'r' || c == 'R')
+				c = tok_nextc(tok);
+			if (c == '"' || c == '\'')
+				goto letter_quote;
+			break;
+		}
+		while (isalnum(c) || c == '_') {
+			c = tok_nextc(tok);
+		}
+		tok_backup(tok, c);
+		*p_start = tok->start;
+		*p_end = tok->cur;
+		return NAME;
+	}
+
+	/* Newline */
+	if (c == '\n') {
+		tok->atbol = 1;
+		if (blankline || tok->level > 0)
+			goto nextline;
+		*p_start = tok->start;
+		*p_end = tok->cur - 1; /* Leave '\n' out of the string */
+                tok->cont_line = 0;
+		return NEWLINE;
+	}
+
+	/* Period or number starting with period? */
+	if (c == '.') {
+		c = tok_nextc(tok);
+		if (isdigit(c)) {
+			goto fraction;
+		}
+		else {
+			tok_backup(tok, c);
+			*p_start = tok->start;
+			*p_end = tok->cur;
+			return DOT;
+		}
+	}
+
+	/* Number */
+	if (isdigit(c)) {
+		if (c == '0') {
+			/* Hex or octal -- maybe. */
+			c = tok_nextc(tok);
+			if (c == '.')
+				goto fraction;
+#ifndef WITHOUT_COMPLEX
+			if (c == 'j' || c == 'J')
+				goto imaginary;
+#endif
+			if (c == 'x' || c == 'X') {
+				/* Hex */
+				do {
+					c = tok_nextc(tok);
+				} while (isxdigit(c));
+			}
+			else {
+				int found_decimal = 0;
+				/* Octal; c is first char of it */
+				/* There's no 'isoctdigit' macro, sigh */
+				while ('0' <= c && c < '8') {
+					c = tok_nextc(tok);
+				}
+				if (isdigit(c)) {
+					found_decimal = 1;
+					do {
+						c = tok_nextc(tok);
+					} while (isdigit(c));
+				}
+				if (c == '.')
+					goto fraction;
+				else if (c == 'e' || c == 'E')
+					goto exponent;
+#ifndef WITHOUT_COMPLEX
+				else if (c == 'j' || c == 'J')
+					goto imaginary;
+#endif
+				else if (found_decimal) {
+					tok->done = E_TOKEN;
+					tok_backup(tok, c);
+					return ERRORTOKEN;
+				}
+			}
+			if (c == 'l' || c == 'L')
+				c = tok_nextc(tok);
+		}
+		else {
+			/* Decimal */
+			do {
+				c = tok_nextc(tok);
+			} while (isdigit(c));
+			if (c == 'l' || c == 'L')
+				c = tok_nextc(tok);
+			else {
+				/* Accept floating point numbers. */
+				if (c == '.') {
+		fraction:
+					/* Fraction */
+					do {
+						c = tok_nextc(tok);
+					} while (isdigit(c));
+				}
+				if (c == 'e' || c == 'E') {
+		exponent:
+					/* Exponent part */
+					c = tok_nextc(tok);
+					if (c == '+' || c == '-')
+						c = tok_nextc(tok);
+					if (!isdigit(c)) {
+						tok->done = E_TOKEN;
+						tok_backup(tok, c);
+						return ERRORTOKEN;
+					}
+					do {
+						c = tok_nextc(tok);
+					} while (isdigit(c));
+				}
+#ifndef WITHOUT_COMPLEX
+				if (c == 'j' || c == 'J')
+					/* Imaginary part */
+		imaginary:
+					c = tok_nextc(tok);
+#endif
+			}
+		}
+		tok_backup(tok, c);
+		*p_start = tok->start;
+		*p_end = tok->cur;
+		return NUMBER;
+	}
+
+  letter_quote:
+	/* String */
+	if (c == '\'' || c == '"') {
+		Py_ssize_t quote2 = tok->cur - tok->start + 1;
+		int quote = c;
+		int triple = 0;
+		int tripcount = 0;
+		for (;;) {
+			c = tok_nextc(tok);
+			if (c == '\n') {
+				if (!triple) {
+					tok->done = E_EOLS;
+					tok_backup(tok, c);
+					return ERRORTOKEN;
+				}
+				tripcount = 0;
+                                tok->cont_line = 1; /* multiline string. */
+			}
+			else if (c == EOF) {
+				if (triple)
+					tok->done = E_EOFS;
+				else
+					tok->done = E_EOLS;
+				tok->cur = tok->inp;
+				return ERRORTOKEN;
+			}
+			else if (c == quote) {
+				tripcount++;
+				if (tok->cur - tok->start == quote2) {
+					c = tok_nextc(tok);
+					if (c == quote) {
+						triple = 1;
+						tripcount = 0;
+						continue;
+					}
+					tok_backup(tok, c);
+				}
+				if (!triple || tripcount == 3)
+					break;
+			}
+			else if (c == '\\') {
+				tripcount = 0;
+				c = tok_nextc(tok);
+				if (c == EOF) {
+					tok->done = E_EOLS;
+					tok->cur = tok->inp;
+					return ERRORTOKEN;
+				}
+			}
+			else
+				tripcount = 0;
+		}
+		*p_start = tok->start;
+		*p_end = tok->cur;
+		return STRING;
+	}
+
+	/* Line continuation */
+	if (c == '\\') {
+		c = tok_nextc(tok);
+		if (c != '\n') {
+			tok->done = E_LINECONT;
+			tok->cur = tok->inp;
+			return ERRORTOKEN;
+		}
+                tok->cont_line = 1;
+		goto again; /* Read next line */
+	}
+
+	/* Check for two-character token */
+	{
+		int c2 = tok_nextc(tok);
+		int token = PyToken_TwoChars(c, c2);
+		if (token != OP) {
+			int c3 = tok_nextc(tok);
+			int token3 = PyToken_ThreeChars(c, c2, c3);
+			if (token3 != OP) {
+				token = token3;
+			} else {
+				tok_backup(tok, c3);
+			}
+			*p_start = tok->start;
+			*p_end = tok->cur;
+			return token;
+		}
+		tok_backup(tok, c2);
+	}
+
+	/* Keep track of parentheses nesting level */
+	switch (c) {
+	case '(':
+	case '[':
+	case '{':
+		tok->level++;
+		break;
+	case ')':
+	case ']':
+	case '}':
+		tok->level--;
+		break;
+	}
+
+	/* Punctuation character */
+	*p_start = tok->start;
+	*p_end = tok->cur;
+	return PyToken_OneChar(c);
+}
+
+int
+PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
+{
+	int result = tok_get(tok, p_start, p_end);
+	if (tok->decoding_erred) {
+		result = ERRORTOKEN;
+		tok->done = E_DECODE;
+	}
+	return result;
+}
+
+#ifdef Py_DEBUG
+
+void
+tok_dump(int type, char *start, char *end)
+{
+	printf("%s", _PyParser_TokenNames[type]);
+	if (type == NAME || type == NUMBER || type == STRING || type == OP)
+		printf("(%.*s)", (int)(end - start), start);
+}
+
+#endif
diff --git a/sys/src/cmd/python/Parser/tokenizer.h b/sys/src/cmd/python/Parser/tokenizer.h
new file mode 100644
index 000000000..5e7ebf74f
--- /dev/null
+++ b/sys/src/cmd/python/Parser/tokenizer.h
@@ -0,0 +1,65 @@
+#ifndef Py_TOKENIZER_H
+#define Py_TOKENIZER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "object.h"
+
+/* Tokenizer interface */
+
+#include "token.h"	/* For token types */
+
+#define MAXINDENT 100	/* Max indentation level */
+
+/* Tokenizer state */
+struct tok_state {
+	/* Input state; buf <= cur <= inp <= end */
+	/* NB an entire line is held in the buffer */
+	char *buf;	/* Input buffer, or NULL; malloc'ed if fp != NULL */
+	char *cur;	/* Next character in buffer */
+	char *inp;	/* End of data in buffer */
+	char *end;	/* End of input buffer if buf != NULL */
+	char *start;	/* Start of current token if not NULL */
+	int done;	/* E_OK normally, E_EOF at EOF, otherwise error code */
+	/* NB If done != E_OK, cur must be == inp!!! */
+	FILE *fp;	/* Rest of input; NULL if tokenizing a string */
+	int tabsize;	/* Tab spacing */
+	int indent;	/* Current indentation index */
+	int indstack[MAXINDENT];	/* Stack of indents */
+	int atbol;	/* Nonzero if at begin of new line */
+	int pendin;	/* Pending indents (if > 0) or dedents (if < 0) */
+	char *prompt, *nextprompt;	/* For interactive prompting */
+	int lineno;	/* Current line number */
+	int level;	/* () [] {} Parentheses nesting level */
+			/* Used to allow free continuations inside them */
+	/* Stuff for checking on different tab sizes */
+	const char *filename;	/* For error messages */
+	int altwarning;	/* Issue warning if alternate tabs don't match */
+	int alterror;	/* Issue error if alternate tabs don't match */
+	int alttabsize;	/* Alternate tab spacing */
+	int altindstack[MAXINDENT];	/* Stack of alternate indents */
+	/* Stuff for PEP 0263 */
+	int decoding_state;	/* -1:decoding, 0:init, 1:raw */
+	int decoding_erred;	/* whether erred in decoding  */
+	int read_coding_spec;	/* whether 'coding:...' has been read  */
+	char *encoding;
+	int cont_line;          /* whether we are in a continuation line. */
+	const char* line_start;	/* pointer to start of current line */
+#ifndef PGEN
+	PyObject *decoding_readline; /* codecs.open(...).readline */
+	PyObject *decoding_buffer;
+#endif
+	const char* enc;
+	const char* str;
+};
+
+extern struct tok_state *PyTokenizer_FromString(const char *);
+extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
+extern void PyTokenizer_Free(struct tok_state *);
+extern int PyTokenizer_Get(struct tok_state *, char **, char **);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_TOKENIZER_H */
diff --git a/sys/src/cmd/python/Parser/tokenizer_pgen.c b/sys/src/cmd/python/Parser/tokenizer_pgen.c
new file mode 100644
index 000000000..9cb8492d6
--- /dev/null
+++ b/sys/src/cmd/python/Parser/tokenizer_pgen.c
@@ -0,0 +1,2 @@
+#define PGEN
+#include "tokenizer.c"
author	cinap_lenrek <cinap_lenrek@localhost>	2011-05-03 11:25:13 +0000
committer	cinap_lenrek <cinap_lenrek@localhost>	2011-05-03 11:25:13 +0000
commit	458120dd40db6b4df55a4e96b650e16798ef06a0 (patch)
tree	8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Parser
parent	3a742c699f6806c1145aea5149bf15de15a0afd7 (diff)