# This library is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""HyperText Transfer Protocol implementation.
The second coming.
Future Plans:
- HTTP client support will at some point be refactored to support HTTP/1.1.
- Accept chunked data from clients in server.
- Other missing HTTP features from the RFC.
Maintainer: U{James Y Knight <mailto:foom@fuhm.net>}
"""
# system imports
from cStringIO import StringIO
import tempfile
import base64, binascii
import cgi
import socket
import math
import time
import calendar
import warnings
import os
# twisted imports
from twisted.internet import interfaces, reactor, protocol, address
from twisted.protocols import policies, basic
from twisted.python import log, components
try: # try importing the fast, C version
from twisted.protocols._c_urlarg import unquote
except ImportError:
from urllib import unquote
# sibling imports
import responsecode
protocol_version = "HTTP/1.1"
def parse_qs(qs, keep_blank_values=0, strict_parsing=0, unquote=unquote):
"""like cgi.parse_qs, only with custom unquote function"""
d = {}
items = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
for item in items:
try:
k, v = item.split("=", 1)
except ValueError:
if strict_parsing:
raise
continue
if v or keep_blank_values:
k = unquote(k.replace("+", " "))
v = unquote(v.replace("+", " "))
if k in d:
d[k].append(v)
else:
d[k] = [v]
return d
def toChunk(data):
"""Convert string to a chunk.
@returns: a tuple of strings representing the chunked encoding of data"""
return ("%x\r\n" % len(data), data, "\r\n")
def fromChunk(data):
"""Convert chunk to string.
@returns: tuple (result, remaining), may raise ValueError.
"""
prefix, rest = data.split('\r\n', 1)
length = int(prefix, 16)
if not rest[length:length+2] == '\r\n':
raise ValueError, "chunk must end with CRLF"
return rest[:length], rest[length+2:]
class StringTransport:
"""
I am a StringIO wrapper that conforms for the transport API. I support
the 'writeSequence' method.
"""
def __init__(self):
self.s = StringIO()
def writeSequence(self, seq):
self.s.write(''.join(seq))
def __getattr__(self, attr):
return getattr(self.__dict__['s'], attr)
# response codes that must have empty bodies
NO_BODY_CODES = (204, 304)
class Request:
"""A HTTP request.
Subclasses should override the process() method to determine how
the request will be processed.
@ivar method: The HTTP method that was used.
@ivar uri: The full URI that was requested (includes arguments).
@ivar path: The path only (arguments not included).
@ivar args: All of the arguments, including URL and POST arguments.
@type args: A mapping of strings (the argument names) to lists of values.
i.e., ?foo=bar&foo=baz&quux=spam results in
{'foo': ['bar', 'baz'], 'quux': ['spam']}.
@ivar received_headers: All received headers
"""
__implements__ = interfaces.IConsumer,
producer = None
finished = 0
code = responsecode.OK
code_message = responsecode.RESPONSES[responsecode.OK]
startedWriting = 0
chunked = 0
sentLength = 0 # content-length of response, or total bytes sent via chunking
_foreceSSL = False
def __init__(self, channel, queued, command, path, version, raw_in_headers):
"""
@param channel: the channel we're connected to.
@param queued: are we in the request queue, or can we start writing to
the transport?
"""
self.channel = channel
self.queued = queued
self.method = command
self.uri = path
self.clientproto = version
self.out_headers = http_headers.ResponseHeaders()
self.in_headers = http_headers.RequestHeaders(raw_in_headers)
if queued:
self.transport = StringTransport()
else:
self.transport = self.channel.transport
def _cleanup(self):
"""Called when have finished responding and are no longer queued."""
if self.producer:
log.err(RuntimeError("Producer was not unregistered for %s" % self.uri))
self.unregisterProducer()
self.channel.requestDone(self)
del self.channel
# methods for channel - end users should not use these
def noLongerQueued(self):
"""Notify the object that it is no longer queued.
We start writing whatever data we have to the transport, etc.
This method is not intended for users.
"""
if not self.queued:
raise RuntimeError, "noLongerQueued() got called unnecessarily."
self.queued = 0
# set transport to real one and send any buffer data
data = self.transport.getvalue()
self.transport = self.channel.transport
if data:
self.transport.write(data)
# if we have producer, register it with transport
if (self.producer is not None) and not self.finished:
self.transport.registerProducer(self.producer, True)
# if we're finished, clean up
if self.finished:
self._cleanup()
def handleContentChunk(self, data):
"""Called by channel when a piece of data has been received.
Should be overridden by a subclass to do something appropriate."""
pass
def handleContentComplete(self):
"""Called by channel when all data has been received.
This method is not intended for users.
"""
self.args = {}
self.stack = []
x = self.uri.split('?')
if len(x) == 1:
self.path = self.uri
else:
if len(x) != 2:
log.msg("May ignore parts of this invalid URI: %s"
% repr(self.uri))
self.path, argstring = x[0], x[1]
self.args = parse_qs(argstring, 1)
# cache the client and server information, we'll need this later to be
# serialized and sent with the request so CGIs will work remotely
self.client = self.channel.transport.getPeer()
self.host = self.channel.transport.getHost()
def __repr__(self):
return '<%s %s %s>'% (self.method, self.uri, self.clientproto)
# consumer interface
def registerProducer(self, producer, streaming=True):
"""Register a producer.
Only streaming (push) producers supported."""
if not streaming:
raise ValueError, "non-streaming (pull) producers not supported."
if self.producer:
raise ValueError, "registering producer %s before previous one (%s) was unregistered" % (producer, self.producer)
self.producer = producer
if self.queued:
producer.pauseProducing()
else:
self.transport.registerProducer(producer, True)
def unregisterProducer(self):
"""Unregister the producer."""
if not self.queued:
self.transport.unregisterProducer()
self.producer = None
# private http response methods
def _sendError(self, code, resp=''):
self.transport.write('%s %s %s\r\n\r\n' % (self.clientproto, code, resp))
# The following is the public interface that people should be
# writing to.
def finish(self):
"""We are finished writing data."""
if self.finished:
warnings.warn("Warning! request.finish called twice.", stacklevel=2)
return
if not self.startedWriting:
# write headers
self.write('')
# debug code just in case I made a mistake
if self.chunked and self.code in NO_BODY_CODES:
raise RuntimeError, "we screwed up"
if self.chunked:
# write last chunk and closing CRLF
self.transport.write("0\r\n\r\n")
# log request
if hasattr(self.channel, "factory"):
self.channel.factory.log(self)
self.finished = 1
if not self.queued:
self._cleanup()
def _initialWrite(self):
self.startedWriting = 1
version = self.clientproto
l = []
l.append('%s %s %s\r\n' % (version, self.code,
self.code_message))
# if we don't have a content length, we send data in
# chunked mode, so that we can support pipelining in
# persistent connections.
if ((version == "HTTP/1.1") and
(self.headers.get('content-length', None) is None) and
(self.code not in NO_BODY_CODES)):
l.append("%s: %s\r\n" % ('Transfer-encoding', 'chunked'))
self.chunked = 1
for name, value in self.headers.items():
l.append("%s: %s\r\n" % (name.capitalize(), value))
l.append("\r\n")
self.transport.writeSequence(l)
# if this is a "HEAD" request, we shouldn't return any data
if self.method == "HEAD":
self.write = lambda data: None
return False
# for certain result codes, we should never return any data
if self.code in NO_BODY_CODES:
self.write = lambda data: None
return False
return True
def write(self, data):
"""
Write some data as a result of an HTTP request. The first
time this is called, it writes out response data.
"""
if not self.startedWriting:
if not self._initialWrite():
return
self.sentLength = self.sentLength + len(data)
if data:
if self.chunked:
self.transport.writeSequence(toChunk(data))
else:
self.transport.write(data)
# FIXME: usefulize this
def writeFile(self, file):
"""
Write data from a file, possibly more efficiently than write(data)
would do. Otherwise identical to write(file.read()).
v """
self.write(file.read())
def setResponseCode(self, code, message=None):
"""Set the HTTP response code.
"""
self.code = code
if message:
self.code_message = message
else:
self.code_message = RESPONSES.get(code, "Unknown Status")
def setHeader(self, k, v):
"""Set an outgoing HTTP header.
"""
self.headers[k.lower()] = v
def redirect(self, url):
"""Utility function that does a redirect.
The request should have finish() called after this.
"""
self.setResponseCode(FOUND)
self.setHeader("location", url)
def setLastModified(self, when):
"""Set the X{Last-Modified} time for the response to this request.
If I am called more than once, I ignore attempts to set
Last-Modified earlier, only replacing the Last-Modified time
if it is to a later value.
@param when: The last time the resource being returned was
modified, in seconds since the epoch.
@type when: number
"""
# time.time() may be a float, but the HTTP-date strings are
# only good for whole seconds.
when = long(math.ceil(when))
lastModified = self.getRespHeader('Last-Modified')
if not lastModified or (lastModified < when):
self.setRespHeader('Last-Modified', when)
def checkBody(self):
"""Check to see if this request should have a body. As a side-effect
may modify my response code to L{NOT_MODIFIED} or L{PRECONDITION_FAILED},
if appropriate.
Call this function after setting the ETag and Last-Modified
output headers, but before actually proceeding with request
processing.
This examines the appropriate request headers for conditionals,
the existing response headers and sets the response code as necessary.
@return: True if you should write a body, False if you should
not.
"""
tags = self.getReqHeader("if-none-match")
etag = self.getRespHeader("etag")
if tags:
if (etag in tags) or ('*' in tags):
self.setResponseCode(((self.method in ("HEAD", "GET"))
and NOT_MODIFIED)
or PRECONDITION_FAILED)
return False
modified_since = self.getReqHeader('if-modified-since')
if modified_since:
if modified_since >= self.lastModified:
self.setResponseCode(NOT_MODIFIED)
return False
# if this is a "HEAD" request, we shouldn't return any data
if self.method == "HEAD":
return False
return True
def getRequestHostname(self):
"""Get the hostname that the user passed in to the request.
This will either use the Host: header (if it is available) or the
host we are listening on if the header is unavailable.
"""
return (self.getReqHeader('host') or
socket.gethostbyaddr(self.getHost()[1])[0]
).split(':')[0]
def getHost(self):
"""Get my originally requesting transport's host.
Don't rely on the 'transport' attribute, since Request objects may be
copied remotely. For information on this method's return value, see
twisted.internet.tcp.Port.
"""
return self.host
def setHost(self, host, port, ssl=0):
"""Change the host and port the request thinks it's using.
This method is useful for working with reverse HTTP proxies (e.g.
both Squid and Apache's mod_proxy can do this), when the address
the HTTP client is using is different than the one we're listening on.
For example, Apache may be listening on https://www.example.com, and then
forwarding requests to http://localhost:8080, but we don't want HTML produced
by Twisted to say 'http://localhost:8080', they should say 'https://www.example.com',
so we do::
request.setHost('www.example.com', 443, ssl=1)
This method is experimental.
"""
self._forceSSL = ssl
self.received_headers["host"] = host
self.host = address.IPv4Address("TCP", host, port)
def getClientIP(self):
if isinstance(self.client, address.IPv4Address):
return self.client.host
else:
return None
def isSecure(self):
return self._forceSSL or components.implements(self.channel.transport, interfaces.ISSLTransport)
def _authorize(self):
# Authorization, (mostly) per the RFC
try:
authh = self.getReqHeaderRaw("Authorization")
if not authh:
self.user = self.password = ''
return
bas, upw = authh.split()
if bas.lower() != "basic":
raise ValueError
upw = base64.decodestring(upw)
self.user, self.password = upw.split(':', 1)
except (binascii.Error, ValueError):
self.user = self.password = ""
except:
log.err()
self.user = self.password = ""
def getUser(self):
try:
return self.user
except:
pass
self._authorize()
return self.user
def getPassword(self):
try:
return self.password
except:
pass
self._authorize()
return self.password
def connectionLost(self, reason):
"""connection was lost"""
pass
class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
"""A receiver for HTTP requests. Handles the hop-by-hop behavior."""
# set in instances or subclasses
maxHeaderLength = 10240 # maximum length of headers (10KiB)
requestFactory = Request
_partialheader = ''
_first_line = 1
_headerlen = 0
_savedTimeOut = None
def __init__(self):
self.reqHeaders = {}
# the request queue
self.requests = []
def connectionMade(self):
self.setTimeout(self.timeOut)
def lineReceived(self, line):
self.resetTimeout()
if self._first_line:
# if this connection is not persistent, drop any data which
# the client (illegally) sent after the last request.
if not self.persistent:
self.dataReceived = self.lineReceived = lambda *args: None
return
# IE sends an extraneous empty line (\r\n) after a POST request;
# eat up such a line, but only ONCE
if not line and self._first_line == 1:
self._first_line = 2
return
self._first_line = 0
parts = line.split()
if len(parts) != 3:
self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
self.transport.loseConnection()
return
self._command, self._path, self._version = parts
elif line == '':
if self._partialheader:
self.headerReceived(self._partialheader)
self._partialheader = ''
self.allHeadersReceived()
if self.length == 0:
self.allContentReceived()
else:
self.setRawMode()
elif line[0] in ' \t':
self._partialheader = self._partialheader+line
else:
if self._partialheader:
self.headerReceived(self._partialheader)
self._partialheader = line
def headerReceived(self, line):
"""Store this header away. Check for too much header data
(> maxHeaderLength) and abort the connection if so.
"""
name,val = line.split(':', 1)
val.lstrip(' \t')
old = self._reqHeaders.get(name, None)
if old is None:
old = []
self._reqHeaders[name]=old
old.append(val)
self._headerlen = self._headerlen+ len(line)
if self._headerlen > self.maxHeaderLength:
self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
self.transport.loseConnection()
def allHeadersReceived(self):
# set connection variables to
self.length = 0
self.persistent = False
# Split off connection-related headers
connHeaders = self.splitConnectionHeaders()
# create a new Request object
request = self.requestFactory(self, len(self.requests), self._command, self._path, self._version, self._reqHeaders)
self.requests.append(request)
# Reset header state variables
del self._command, self._path, self._version
del self._headerlen
self._reqHeaders = {}
self.persistent = self.checkPersistence(request)
self.length=req.getReqHeader('Content-Length')
def allContentReceived(self):
# reset state variables, so we don't interfere with next request
self.length = 0
self._first_line = 1
# Disable the idle timeout, in case this request takes a long
# time to finish generating output.
if self.timeOut:
self._savedTimeOut = self.setTimeout(None)
req = self.requests[-1]
req.handleContentComplete()
def rawDataReceived(self, data):
if len(data) < self.length:
self.requests[-1].handleContentChunk(data)
self.length = self.length - len(data)
else:
self.requests[-1].handleContentChunk(data[:self.length])
extraneous = data[self.length:]
self.allContentReceived()
self.setLineMode(extraneous)
def requestDone(self, request):
"""Called by first request in queue when it is done."""
if request != self.requests[0]: raise TypeError
del self.requests[0]
if self.persistent:
# notify next request it can start writing
if self.requests:
self.requests[0].noLongerQueued()
else:
if self._savedTimeOut:
self.setTimeout(self._savedTimeOut)
else:
self.transport.loseConnection()
def timeoutConnection(self):
log.msg("Timing out client: %s" % str(self.transport.getPeer()))
policies.TimeoutMixin.timeoutConnection(self)
def connectionLost(self, reason):
self.setTimeout(None)
for request in self.requests:
request.connectionLost(reason)
def splitConnectionHeaders(self):
# Split off headers for the connection from headers for the request.
def move(name):
h = reqHeaders.getRawHeader(name, None)
if h is not None:
reqHeaders.removeHeader(name)
connHeaders.setRawHeader(name, h)
connHeaderNames = ['Connection', 'Keep-Alive', 'Proxy-Authenticate', 'Proxy-Authorization', 'TE', 'Trailers', 'Transfer-Encoding', 'Upgrade']
reqHeaders = self._reqHeaders
connHeaders = http_headers.Headers()
move('Connection')
if connHeaders.hasHeader('Connection'):
if self._version != "1.1":
# Remove all headers mentioned in Connection, because a HTTP 1.0
# proxy might have erroneously forwarded it from a 1.1 client.
for name in connHeaders.getHeader('Connection'):
if reqHeaders.hasHeader(name):
reqHeaders.removeHeader(name)
else:
# Otherwise, just add the headers listed to the list of those to move
connHeaderNames.extend(connHeaders.getHeader('Connection'))
for headername in connHeaders:
move(headername)
# Content-Length is a both a connection header (defining length of
# transmission, and a content header (defining length of content).
h = reqHeaders.getRawHeader('Content-Length', None)
if h is not None:
connHeaders.setRawHeader('Content-Length', h)
return connHeaders
def checkPersistence(self, request):
"""Check if the channel should close or not."""
# HTTP 1.0 persistent connection support is unimplemented:
# we need a way to disable pipelining. HTTP 1.0 can't do
# pipelining since we can't know in advance if we'll have a
# outgoing content-length header. If we don't have the header
# we need to close the connection. In HTTP 1.1 this is not an
# issue since we use chunked encoding if content-length is
# not available.
# Also, who really cares about extra features for HTTP/1.0; nearly
# everything supports 1.1 these days, so as long as 1.0 *works*, that's
# fine. (Hrm just noticed, Squid only supports HTTP 1.0 so far, so this
# might be an issue worth thinking about after all)
if self.version == "HTTP/1.1":
if 'close' in self.getReqHeader('connection'):
self.addRespHeader('connection', 'close')
return 0
else:
return 1
else:
return 0
class HTTPFactory(protocol.ServerFactory):
"""Factory for HTTP server."""
protocol = HTTPChannel
logPath = None
timeOut = 60 * 60 * 12
def __init__(self, logPath=None, timeout=60*60*12):
if logPath is not None:
logPath = os.path.abspath(logPath)
self.logPath = logPath
self.timeOut = timeout
def buildProtocol(self, addr):
p = protocol.ServerFactory.buildProtocol(self, addr)
# timeOut needs to be on the Protocol instance cause
# TimeoutMixin expects it there
p.timeOut = self.timeOut
return p
def startFactory(self):
_logDateTimeStart()
if self.logPath:
self.logFile = self._openLogFile(self.logPath)
else:
self.logFile = log.logfile
def stopFactory(self):
if hasattr(self, "logFile"):
if self.logFile != log.logfile:
self.logFile.close()
del self.logFile
_logDateTimeStop()
def _openLogFile(self, path):
"""Override in subclasses, e.g. to use twisted.python.logfile."""
f = open(path, "a", 1)
f.seek(2, 0)
return f
def log(self, request):
"""Log a request's result to the logfile, by default in combined log format."""
line = '%s - - %s "%s" %d %s "%s" "%s"\n' % (
request.getClientIP(),
# request.getUser() or "-", # the remote user is almost never important
_logDateTime,
'%s %s %s' % (request.method, request.uri, request.clientproto),
request.code,
request.sentLength or "-",
request.getReqHeader("referer") or "-",
request.getReqHeader("user-agent") or "-")
self.logFile.write(line)
# def gotLength(self, length):
# """Called when HTTP channel got length of content in this request.
# This method is not intended for users.
# """
# if length < 100000:
# self.content = StringIO()
# else:
# self.content = tempfile.TemporaryFile()
# def handleContentChunk(self, data):
# """Write a chunk of data.
# This method is not intended for users.
# """
# self.content.write(data)
# # Argument processing
# args = self.args
# ctype = self.getHeader('content-type')
# if self.method == "POST" and ctype:
# mfd = 'multipart/form-data'
# key, pdict = cgi.parse_header(ctype)
# if key == 'application/x-www-form-urlencoded':
# args.update(
# parse_qs(self.content.read(), 1))
# elif key == mfd:
# args.update(
# cgi.parse_multipart(self.content, pdict))
# else:
# pass
syntax highlighted by Code2HTML, v. 0.9.1