From a73a964e51247ed169d322c725a3a18859f109a3 Mon Sep 17 00:00:00 2001 From: Ori Bernstein Date: Mon, 14 Jun 2021 00:00:37 +0000 Subject: python, hg: tow outside the environment. they've served us well, and can ride off into the sunset. --- sys/lib/python/xmllib.py | 929 ----------------------------------------------- 1 file changed, 929 deletions(-) delete mode 100644 sys/lib/python/xmllib.py (limited to 'sys/lib/python/xmllib.py') diff --git a/sys/lib/python/xmllib.py b/sys/lib/python/xmllib.py deleted file mode 100644 index 2a189cdd8..000000000 --- a/sys/lib/python/xmllib.py +++ /dev/null @@ -1,929 +0,0 @@ -"""A parser for XML, using the derived class as static DTD.""" - -# Author: Sjoerd Mullender. - -import re -import string - -import warnings -warnings.warn("The xmllib module is obsolete. Use xml.sax instead.", DeprecationWarning) -del warnings - -version = '0.3' - -class Error(RuntimeError): - pass - -# Regular expressions used for parsing - -_S = '[ \t\r\n]+' # white space -_opS = '[ \t\r\n]*' # optional white space -_Name = '[a-zA-Z_:][-a-zA-Z0-9._:]*' # valid XML name -_QStr = "(?:'[^']*'|\"[^\"]*\")" # quoted XML string -illegal = re.compile('[^\t\r\n -\176\240-\377]') # illegal chars in content -interesting = re.compile('[]&<]') - -amp = re.compile('&') -ref = re.compile('&(' + _Name + '|#[0-9]+|#x[0-9a-fA-F]+)[^-a-zA-Z0-9._:]') -entityref = re.compile('&(?P' + _Name + ')[^-a-zA-Z0-9._:]') -charref = re.compile('&#(?P[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])') -space = re.compile(_S + '$') -newline = re.compile('\n') - -attrfind = re.compile( - _S + '(?P' + _Name + ')' - '(' + _opS + '=' + _opS + - '(?P'+_QStr+'|[-a-zA-Z0-9.:+*%?!\(\)_#=~]+))?') -starttagopen = re.compile('<' + _Name) -starttagend = re.compile(_opS + '(?P/?)>') -starttagmatch = re.compile('<(?P'+_Name+')' - '(?P(?:'+attrfind.pattern+')*)'+ - starttagend.pattern) -endtagopen = re.compile('') -endbracketfind = re.compile('(?:[^>\'"]|'+_QStr+')*>') -tagfind = re.compile(_Name) -cdataopen = re.compile(r'') -# this matches one of the following: -# SYSTEM SystemLiteral -# PUBLIC PubidLiteral SystemLiteral -_SystemLiteral = '(?P<%s>'+_QStr+')' -_PublicLiteral = '(?P<%s>"[-\'\(\)+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*"|' \ - "'[-\(\)+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*')" -_ExternalId = '(?:SYSTEM|' \ - 'PUBLIC'+_S+_PublicLiteral%'pubid'+ \ - ')'+_S+_SystemLiteral%'syslit' -doctype = re.compile(''+_Name+')' - '(?:'+_S+_ExternalId+')?'+_opS) -xmldecl = re.compile('<\?xml'+_S+ - 'version'+_opS+'='+_opS+'(?P'+_QStr+')'+ - '(?:'+_S+'encoding'+_opS+'='+_opS+ - "(?P'[A-Za-z][-A-Za-z0-9._]*'|" - '"[A-Za-z][-A-Za-z0-9._]*"))?' - '(?:'+_S+'standalone'+_opS+'='+_opS+ - '(?P\'(?:yes|no)\'|"(?:yes|no)"))?'+ - _opS+'\?>') -procopen = re.compile(r'<\?(?P' + _Name + ')' + _opS) -procclose = re.compile(_opS + r'\?>') -commentopen = re.compile('') -doubledash = re.compile('--') -attrtrans = string.maketrans(' \r\n\t', ' ') - -# definitions for XML namespaces -_NCName = '[a-zA-Z_][-a-zA-Z0-9._]*' # XML Name, minus the ":" -ncname = re.compile(_NCName + '$') -qname = re.compile('(?:(?P' + _NCName + '):)?' # optional prefix - '(?P' + _NCName + ')$') - -xmlns = re.compile('xmlns(?::(?P'+_NCName+'))?$') - -# XML parser base class -- find tags and call handler functions. -# Usage: p = XMLParser(); p.feed(data); ...; p.close(). -# The dtd is defined by deriving a class which defines methods with -# special names to handle tags: start_foo and end_foo to handle -# and , respectively. The data between tags is passed to the -# parser by calling self.handle_data() with some data as argument (the -# data may be split up in arbitrary chunks). - -class XMLParser: - attributes = {} # default, to be overridden - elements = {} # default, to be overridden - - # parsing options, settable using keyword args in __init__ - __accept_unquoted_attributes = 0 - __accept_missing_endtag_name = 0 - __map_case = 0 - __accept_utf8 = 0 - __translate_attribute_references = 1 - - # Interface -- initialize and reset this instance - def __init__(self, **kw): - self.__fixed = 0 - if 'accept_unquoted_attributes' in kw: - self.__accept_unquoted_attributes = kw['accept_unquoted_attributes'] - if 'accept_missing_endtag_name' in kw: - self.__accept_missing_endtag_name = kw['accept_missing_endtag_name'] - if 'map_case' in kw: - self.__map_case = kw['map_case'] - if 'accept_utf8' in kw: - self.__accept_utf8 = kw['accept_utf8'] - if 'translate_attribute_references' in kw: - self.__translate_attribute_references = kw['translate_attribute_references'] - self.reset() - - def __fixelements(self): - self.__fixed = 1 - self.elements = {} - self.__fixdict(self.__dict__) - self.__fixclass(self.__class__) - - def __fixclass(self, kl): - self.__fixdict(kl.__dict__) - for k in kl.__bases__: - self.__fixclass(k) - - def __fixdict(self, dict): - for key in dict.keys(): - if key[:6] == 'start_': - tag = key[6:] - start, end = self.elements.get(tag, (None, None)) - if start is None: - self.elements[tag] = getattr(self, key), end - elif key[:4] == 'end_': - tag = key[4:] - start, end = self.elements.get(tag, (None, None)) - if end is None: - self.elements[tag] = start, getattr(self, key) - - # Interface -- reset this instance. Loses all unprocessed data - def reset(self): - self.rawdata = '' - self.stack = [] - self.nomoretags = 0 - self.literal = 0 - self.lineno = 1 - self.__at_start = 1 - self.__seen_doctype = None - self.__seen_starttag = 0 - self.__use_namespaces = 0 - self.__namespaces = {'xml':None} # xml is implicitly declared - # backward compatibility hack: if elements not overridden, - # fill it in ourselves - if self.elements is XMLParser.elements: - self.__fixelements() - - # For derived classes only -- enter literal mode (CDATA) till EOF - def setnomoretags(self): - self.nomoretags = self.literal = 1 - - # For derived classes only -- enter literal mode (CDATA) - def setliteral(self, *args): - self.literal = 1 - - # Interface -- feed some data to the parser. Call this as - # often as you want, with as little or as much text as you - # want (may include '\n'). (This just saves the text, all the - # processing is done by goahead().) - def feed(self, data): - self.rawdata = self.rawdata + data - self.goahead(0) - - # Interface -- handle the remaining data - def close(self): - self.goahead(1) - if self.__fixed: - self.__fixed = 0 - # remove self.elements so that we don't leak - del self.elements - - # Interface -- translate references - def translate_references(self, data, all = 1): - if not self.__translate_attribute_references: - return data - i = 0 - while 1: - res = amp.search(data, i) - if res is None: - return data - s = res.start(0) - res = ref.match(data, s) - if res is None: - self.syntax_error("bogus `&'") - i = s+1 - continue - i = res.end(0) - str = res.group(1) - rescan = 0 - if str[0] == '#': - if str[1] == 'x': - str = chr(int(str[2:], 16)) - else: - str = chr(int(str[1:])) - if data[i - 1] != ';': - self.syntax_error("`;' missing after char reference") - i = i-1 - elif all: - if str in self.entitydefs: - str = self.entitydefs[str] - rescan = 1 - elif data[i - 1] != ';': - self.syntax_error("bogus `&'") - i = s + 1 # just past the & - continue - else: - self.syntax_error("reference to unknown entity `&%s;'" % str) - str = '&' + str + ';' - elif data[i - 1] != ';': - self.syntax_error("bogus `&'") - i = s + 1 # just past the & - continue - - # when we get here, str contains the translated text and i points - # to the end of the string that is to be replaced - data = data[:s] + str + data[i:] - if rescan: - i = s - else: - i = s + len(str) - - # Interface - return a dictionary of all namespaces currently valid - def getnamespace(self): - nsdict = {} - for t, d, nst in self.stack: - nsdict.update(d) - return nsdict - - # Internal -- handle data as far as reasonable. May leave state - # and data to be processed by a subsequent call. If 'end' is - # true, force handling all data as if followed by EOF marker. - def goahead(self, end): - rawdata = self.rawdata - i = 0 - n = len(rawdata) - while i < n: - if i > 0: - self.__at_start = 0 - if self.nomoretags: - data = rawdata[i:n] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = n - break - res = interesting.search(rawdata, i) - if res: - j = res.start(0) - else: - j = n - if i < j: - data = rawdata[i:j] - if self.__at_start and space.match(data) is None: - self.syntax_error('illegal data at start of file') - self.__at_start = 0 - if not self.stack and space.match(data) is None: - self.syntax_error('data not in content') - if not self.__accept_utf8 and illegal.search(data): - self.syntax_error('illegal character in content') - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = j - if i == n: break - if rawdata[i] == '<': - if starttagopen.match(rawdata, i): - if self.literal: - data = rawdata[i] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = i+1 - continue - k = self.parse_starttag(i) - if k < 0: break - self.__seen_starttag = 1 - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - if endtagopen.match(rawdata, i): - k = self.parse_endtag(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - if commentopen.match(rawdata, i): - if self.literal: - data = rawdata[i] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = i+1 - continue - k = self.parse_comment(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - if cdataopen.match(rawdata, i): - k = self.parse_cdata(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - res = xmldecl.match(rawdata, i) - if res: - if not self.__at_start: - self.syntax_error(" declaration not at start of document") - version, encoding, standalone = res.group('version', - 'encoding', - 'standalone') - if version[1:-1] != '1.0': - raise Error('only XML version 1.0 supported') - if encoding: encoding = encoding[1:-1] - if standalone: standalone = standalone[1:-1] - self.handle_xml(encoding, standalone) - i = res.end(0) - continue - res = procopen.match(rawdata, i) - if res: - k = self.parse_proc(i) - if k < 0: break - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - res = doctype.match(rawdata, i) - if res: - if self.literal: - data = rawdata[i] - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - i = i+1 - continue - if self.__seen_doctype: - self.syntax_error('multiple DOCTYPE elements') - if self.__seen_starttag: - self.syntax_error('DOCTYPE not at beginning of document') - k = self.parse_doctype(res) - if k < 0: break - self.__seen_doctype = res.group('name') - if self.__map_case: - self.__seen_doctype = self.__seen_doctype.lower() - self.lineno = self.lineno + rawdata[i:k].count('\n') - i = k - continue - elif rawdata[i] == '&': - if self.literal: - data = rawdata[i] - self.handle_data(data) - i = i+1 - continue - res = charref.match(rawdata, i) - if res is not None: - i = res.end(0) - if rawdata[i-1] != ';': - self.syntax_error("`;' missing in charref") - i = i-1 - if not self.stack: - self.syntax_error('data not in content') - self.handle_charref(res.group('char')[:-1]) - self.lineno = self.lineno + res.group(0).count('\n') - continue - res = entityref.match(rawdata, i) - if res is not None: - i = res.end(0) - if rawdata[i-1] != ';': - self.syntax_error("`;' missing in entityref") - i = i-1 - name = res.group('name') - if self.__map_case: - name = name.lower() - if name in self.entitydefs: - self.rawdata = rawdata = rawdata[:res.start(0)] + self.entitydefs[name] + rawdata[i:] - n = len(rawdata) - i = res.start(0) - else: - self.unknown_entityref(name) - self.lineno = self.lineno + res.group(0).count('\n') - continue - elif rawdata[i] == ']': - if self.literal: - data = rawdata[i] - self.handle_data(data) - i = i+1 - continue - if n-i < 3: - break - if cdataclose.match(rawdata, i): - self.syntax_error("bogus `]]>'") - self.handle_data(rawdata[i]) - i = i+1 - continue - else: - raise Error('neither < nor & ??') - # We get here only if incomplete matches but - # nothing else - break - # end while - if i > 0: - self.__at_start = 0 - if end and i < n: - data = rawdata[i] - self.syntax_error("bogus `%s'" % data) - if not self.__accept_utf8 and illegal.search(data): - self.syntax_error('illegal character in content') - self.handle_data(data) - self.lineno = self.lineno + data.count('\n') - self.rawdata = rawdata[i+1:] - return self.goahead(end) - self.rawdata = rawdata[i:] - if end: - if not self.__seen_starttag: - self.syntax_error('no elements in file') - if self.stack: - self.syntax_error('missing end tags') - while self.stack: - self.finish_endtag(self.stack[-1][0]) - - # Internal -- parse comment, return length or -1 if not terminated - def parse_comment(self, i): - rawdata = self.rawdata - if rawdata[i:i+4] != '