requests的高级功能-超时时间

requests的高级功能


如果简单的使用requests,会发现(requesets.get..)使用了默认参数的HTTPAdapter,因此所有由HTTPAdapter初始化参数指定的功能都没有办法使用,例如:重试、缓存池大小、缓存连接池大小、缓存池是否堵塞等。当然,因为requests.get方式只会发起一次HTTP请求,所以缓存相关的都没有设置的必要。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# requests.sessions.Session
class Session(SessionRedirectMixin):
def __init__(self):
....
self.adapters = OrderedDict()
self.mount('https://', HTTPAdapter())
self.mount('http://', HTTPAdapter())
# requests.adapters.HTTPAdapter
class HTTPAdapter(BaseAdapter):
def __init__(self, pool_connections=DEFAULT_POOLSIZE,
pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES,
pool_block=DEFAULT_POOLBLOCK):

设置超时时间

超时时间可以通过timeout参数指定,可以详细为(connect_timeout, read_timeout)。

1
2
3
4
5
"""
:param timeout: (optional) How long to wait for the server to send
data before giving up, as a float, or a :ref:`(connect timeout,
read timeout) <timeouts>` tuple.
"""

通过流程图可以看到,传递的timeout参数一直进入到HTTPAdapter.send内。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# requests.adapters.HTTPAdapter
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None):
...
if isinstance(timeout, tuple):
try:
connect, read = timeout
# timout被实例化为
timeout = TimeoutSauce(connect=connect, read=read)
except ValueError as e:
# this may raise a string formatting error.
err = ("Invalid timeout {0}. Pass a (connect, read) "
"timeout tuple, or a single float to set "
"both timeouts to the same value".format(timeout))
raise ValueError(err)
else:
timeout = TimeoutSauce(connect=timeout, read=timeout)
try:
if not chunked:
resp = conn.urlopen(
method=request.method,
url=url,
body=request.body,
headers=request.headers,
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
retries=self.max_retries,
timeout=timeout # 传入HTTPConnectionPool.urlopen
)
# requests.packages.urllib3.util.timeout.py
class Timeout(object):
def __init__(self, total=None, connect=_Default, read=_Default):
self._connect = self._validate_timeout(connect, 'connect')
self._read = self._validate_timeout(read, 'read')
self.total = self._validate_timeout(total, 'total')
self._start_connect = None
def get_connect_duration(self):
""" Gets the time elapsed since the call to :meth:`start_connect`.
:return: Elapsed time.
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
"""
if self._start_connect is None:
raise TimeoutStateError("Can't get connect duration for timer "
"that has not started.")
return current_time() - self._start_connect
@property
def read_timeout(self):
if (self.total is not None and
self.total is not self.DEFAULT_TIMEOUT and
self._read is not None and
self._read is not self.DEFAULT_TIMEOUT):
# In case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
# 如果值指定了total,则read_timeout是链接后剩余的事件
return max(0, min(self.total - self.get_connect_duration(),
self._read))
elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
return max(0, self.total - self.get_connect_duration())
else:
return self._read

然后实例化后的timeout传递给HTTPConnectionPool,其中的connect_timeout设置为conn.timeout然后一直传递到socket中,通过socket.settimeout设置起效。需要注意socket是在设置参数之后再执行的bind->connect操作。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# lib/socket.py
_GLOBAL_DEFAULT_TIMEOUT = object()
def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
host, port = address
err = None
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket(af, socktype, proto)
if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
sock.connect(sa)
return sock
except error as _:
err = _
if sock is not None:
sock.close()
if err is not None:
raise err
else:
raise error("getaddrinfo returns an empty list")

其中的read_timeoutHTTPConnectionPool中设置。通过代码可以看到socket.settimeout设置的是socket所有操作的超时时间,在不同的阶段调用该函数就设置了接下来操作的超时时间,settimeout -> bind -> connect -> settimeout -> read

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# requests.packages.urllib3.connectionpool.py
class HTTPConnectionPool
def _make_request(...):
....
read_timeout = timeout_obj.read_timeout
# App Engine doesn't have a sock attr
if getattr(conn, 'sock', None):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
# the exception and assuming all BadStatusLine exceptions are read
# timeouts, check for a zero timeout before making the request.
if read_timeout == 0:
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
if read_timeout is Timeout.DEFAULT_TIMEOUT:
conn.sock.settimeout(socket.getdefaulttimeout())
else: # None or a value
# 同样通过settimeout函数设置,不过此时已经connect完了,接下来就只有read操作
conn.sock.settimeout(read_timeout)