summaryrefslogtreecommitdiff
path: root/sys/src/cmd/python/Doc/lib/librobotparser.tex
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2021-06-14 00:00:37 +0000
committerOri Bernstein <ori@eigenstate.org>2021-06-14 00:00:37 +0000
commita73a964e51247ed169d322c725a3a18859f109a3 (patch)
tree3f752d117274d444bda44e85609aeac1acf313f3 /sys/src/cmd/python/Doc/lib/librobotparser.tex
parente64efe273fcb921a61bf27d33b230c4e64fcd425 (diff)
python, hg: tow outside the environment.
they've served us well, and can ride off into the sunset.
Diffstat (limited to 'sys/src/cmd/python/Doc/lib/librobotparser.tex')
-rw-r--r--sys/src/cmd/python/Doc/lib/librobotparser.tex66
1 files changed, 0 insertions, 66 deletions
diff --git a/sys/src/cmd/python/Doc/lib/librobotparser.tex b/sys/src/cmd/python/Doc/lib/librobotparser.tex
deleted file mode 100644
index 5eac5283e..000000000
--- a/sys/src/cmd/python/Doc/lib/librobotparser.tex
+++ /dev/null
@@ -1,66 +0,0 @@
-\section{\module{robotparser} ---
- Parser for robots.txt}
-
-\declaremodule{standard}{robotparser}
-\modulesynopsis{Loads a \protect\file{robots.txt} file and
- answers questions about fetchability of other URLs.}
-\sectionauthor{Skip Montanaro}{skip@mojam.com}
-
-\index{WWW}
-\index{World Wide Web}
-\index{URL}
-\index{robots.txt}
-
-This module provides a single class, \class{RobotFileParser}, which answers
-questions about whether or not a particular user agent can fetch a URL on
-the Web site that published the \file{robots.txt} file. For more details on
-the structure of \file{robots.txt} files, see
-\url{http://www.robotstxt.org/wc/norobots.html}.
-
-\begin{classdesc}{RobotFileParser}{}
-
-This class provides a set of methods to read, parse and answer questions
-about a single \file{robots.txt} file.
-
-\begin{methoddesc}{set_url}{url}
-Sets the URL referring to a \file{robots.txt} file.
-\end{methoddesc}
-
-\begin{methoddesc}{read}{}
-Reads the \file{robots.txt} URL and feeds it to the parser.
-\end{methoddesc}
-
-\begin{methoddesc}{parse}{lines}
-Parses the lines argument.
-\end{methoddesc}
-
-\begin{methoddesc}{can_fetch}{useragent, url}
-Returns \code{True} if the \var{useragent} is allowed to fetch the \var{url}
-according to the rules contained in the parsed \file{robots.txt} file.
-\end{methoddesc}
-
-\begin{methoddesc}{mtime}{}
-Returns the time the \code{robots.txt} file was last fetched. This is
-useful for long-running web spiders that need to check for new
-\code{robots.txt} files periodically.
-\end{methoddesc}
-
-\begin{methoddesc}{modified}{}
-Sets the time the \code{robots.txt} file was last fetched to the current
-time.
-\end{methoddesc}
-
-\end{classdesc}
-
-The following example demonstrates basic use of the RobotFileParser class.
-
-\begin{verbatim}
->>> import robotparser
->>> rp = robotparser.RobotFileParser()
->>> rp.set_url("http://www.musi-cal.com/robots.txt")
->>> rp.read()
->>> rp.can_fetch("*", "http://www.musi-cal.com/cgi-bin/search?city=San+Francisco")
-False
->>> rp.can_fetch("*", "http://www.musi-cal.com/")
-True
-\end{verbatim}