diff options
author | Ori Bernstein <ori@eigenstate.org> | 2021-06-14 00:00:37 +0000 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2021-06-14 00:00:37 +0000 |
commit | a73a964e51247ed169d322c725a3a18859f109a3 (patch) | |
tree | 3f752d117274d444bda44e85609aeac1acf313f3 /sys/src/cmd/python/Tools/webchecker/websucker.py | |
parent | e64efe273fcb921a61bf27d33b230c4e64fcd425 (diff) |
python, hg: tow outside the environment.
they've served us well, and can ride off into the sunset.
Diffstat (limited to 'sys/src/cmd/python/Tools/webchecker/websucker.py')
-rwxr-xr-x | sys/src/cmd/python/Tools/webchecker/websucker.py | 125 |
1 files changed, 0 insertions, 125 deletions
diff --git a/sys/src/cmd/python/Tools/webchecker/websucker.py b/sys/src/cmd/python/Tools/webchecker/websucker.py deleted file mode 100755 index fdbf28869..000000000 --- a/sys/src/cmd/python/Tools/webchecker/websucker.py +++ /dev/null @@ -1,125 +0,0 @@ -#! /usr/bin/env python - -"""A variant on webchecker that creates a mirror copy of a remote site.""" - -__version__ = "$Revision: 28654 $" - -import os -import sys -import urllib -import getopt - -import webchecker - -# Extract real version number if necessary -if __version__[0] == '$': - _v = __version__.split() - if len(_v) == 3: - __version__ = _v[1] - -def main(): - verbose = webchecker.VERBOSE - try: - opts, args = getopt.getopt(sys.argv[1:], "qv") - except getopt.error, msg: - print msg - print "usage:", sys.argv[0], "[-qv] ... [rooturl] ..." - return 2 - for o, a in opts: - if o == "-q": - verbose = 0 - if o == "-v": - verbose = verbose + 1 - c = Sucker() - c.setflags(verbose=verbose) - c.urlopener.addheaders = [ - ('User-agent', 'websucker/%s' % __version__), - ] - for arg in args: - print "Adding root", arg - c.addroot(arg) - print "Run..." - c.run() - -class Sucker(webchecker.Checker): - - checkext = 0 - nonames = 1 - - # SAM 11/13/99: in general, URLs are now URL pairs. - # Since we've suppressed name anchor checking, - # we can ignore the second dimension. - - def readhtml(self, url_pair): - url = url_pair[0] - text = None - path = self.savefilename(url) - try: - f = open(path, "rb") - except IOError: - f = self.openpage(url_pair) - if f: - info = f.info() - nurl = f.geturl() - if nurl != url: - url = nurl - path = self.savefilename(url) - text = f.read() - f.close() - self.savefile(text, path) - if not self.checkforhtml(info, url): - text = None - else: - if self.checkforhtml({}, url): - text = f.read() - f.close() - return text, url - - def savefile(self, text, path): - dir, base = os.path.split(path) - makedirs(dir) - try: - f = open(path, "wb") - f.write(text) - f.close() - self.message("saved %s", path) - except IOError, msg: - self.message("didn't save %s: %s", path, str(msg)) - - def savefilename(self, url): - type, rest = urllib.splittype(url) - host, path = urllib.splithost(rest) - path = path.lstrip("/") - user, host = urllib.splituser(host) - host, port = urllib.splitnport(host) - host = host.lower() - if not path or path[-1] == "/": - path = path + "index.html" - if os.sep != "/": - path = os.sep.join(path.split("/")) - if os.name == "mac": - path = os.sep + path - path = os.path.join(host, path) - return path - -def makedirs(dir): - if not dir: - return - if os.path.exists(dir): - if not os.path.isdir(dir): - try: - os.rename(dir, dir + ".bak") - os.mkdir(dir) - os.rename(dir + ".bak", os.path.join(dir, "index.html")) - except os.error: - pass - return - head, tail = os.path.split(dir) - if not tail: - print "Huh? Don't know how to make dir", dir - return - makedirs(head) - os.mkdir(dir, 0777) - -if __name__ == '__main__': - sys.exit(main() or 0) |