http_parser.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. import abc
  2. import asyncio
  3. import collections
  4. import re
  5. import string
  6. import zlib
  7. from enum import IntEnum
  8. from typing import Any, List, Optional, Tuple, Type, Union
  9. from multidict import CIMultiDict, CIMultiDictProxy, istr
  10. from yarl import URL
  11. from . import hdrs
  12. from .base_protocol import BaseProtocol
  13. from .helpers import NO_EXTENSIONS, BaseTimerContext
  14. from .http_exceptions import (
  15. BadStatusLine,
  16. ContentEncodingError,
  17. ContentLengthError,
  18. InvalidHeader,
  19. LineTooLong,
  20. TransferEncodingError,
  21. )
  22. from .http_writer import HttpVersion, HttpVersion10
  23. from .log import internal_logger
  24. from .streams import EMPTY_PAYLOAD, StreamReader
  25. from .typedefs import RawHeaders
  26. try:
  27. import brotli
  28. HAS_BROTLI = True
  29. except ImportError: # pragma: no cover
  30. HAS_BROTLI = False
  31. __all__ = (
  32. "HeadersParser",
  33. "HttpParser",
  34. "HttpRequestParser",
  35. "HttpResponseParser",
  36. "RawRequestMessage",
  37. "RawResponseMessage",
  38. )
  39. ASCIISET = set(string.printable)
  40. # See https://tools.ietf.org/html/rfc7230#section-3.1.1
  41. # and https://tools.ietf.org/html/rfc7230#appendix-B
  42. #
  43. # method = token
  44. # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  45. # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  46. # token = 1*tchar
  47. METHRE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
  48. VERSRE = re.compile(r"HTTP/(\d+).(\d+)")
  49. HDRRE = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
  50. RawRequestMessage = collections.namedtuple(
  51. "RawRequestMessage",
  52. [
  53. "method",
  54. "path",
  55. "version",
  56. "headers",
  57. "raw_headers",
  58. "should_close",
  59. "compression",
  60. "upgrade",
  61. "chunked",
  62. "url",
  63. ],
  64. )
  65. RawResponseMessage = collections.namedtuple(
  66. "RawResponseMessage",
  67. [
  68. "version",
  69. "code",
  70. "reason",
  71. "headers",
  72. "raw_headers",
  73. "should_close",
  74. "compression",
  75. "upgrade",
  76. "chunked",
  77. ],
  78. )
  79. class ParseState(IntEnum):
  80. PARSE_NONE = 0
  81. PARSE_LENGTH = 1
  82. PARSE_CHUNKED = 2
  83. PARSE_UNTIL_EOF = 3
  84. class ChunkState(IntEnum):
  85. PARSE_CHUNKED_SIZE = 0
  86. PARSE_CHUNKED_CHUNK = 1
  87. PARSE_CHUNKED_CHUNK_EOF = 2
  88. PARSE_MAYBE_TRAILERS = 3
  89. PARSE_TRAILERS = 4
  90. class HeadersParser:
  91. def __init__(
  92. self,
  93. max_line_size: int = 8190,
  94. max_headers: int = 32768,
  95. max_field_size: int = 8190,
  96. ) -> None:
  97. self.max_line_size = max_line_size
  98. self.max_headers = max_headers
  99. self.max_field_size = max_field_size
  100. def parse_headers(
  101. self, lines: List[bytes]
  102. ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
  103. headers = CIMultiDict() # type: CIMultiDict[str]
  104. raw_headers = []
  105. lines_idx = 1
  106. line = lines[1]
  107. line_count = len(lines)
  108. while line:
  109. # Parse initial header name : value pair.
  110. try:
  111. bname, bvalue = line.split(b":", 1)
  112. except ValueError:
  113. raise InvalidHeader(line) from None
  114. bname = bname.strip(b" \t")
  115. bvalue = bvalue.lstrip()
  116. if HDRRE.search(bname):
  117. raise InvalidHeader(bname)
  118. if len(bname) > self.max_field_size:
  119. raise LineTooLong(
  120. "request header name {}".format(
  121. bname.decode("utf8", "xmlcharrefreplace")
  122. ),
  123. str(self.max_field_size),
  124. str(len(bname)),
  125. )
  126. header_length = len(bvalue)
  127. # next line
  128. lines_idx += 1
  129. line = lines[lines_idx]
  130. # consume continuation lines
  131. continuation = line and line[0] in (32, 9) # (' ', '\t')
  132. if continuation:
  133. bvalue_lst = [bvalue]
  134. while continuation:
  135. header_length += len(line)
  136. if header_length > self.max_field_size:
  137. raise LineTooLong(
  138. "request header field {}".format(
  139. bname.decode("utf8", "xmlcharrefreplace")
  140. ),
  141. str(self.max_field_size),
  142. str(header_length),
  143. )
  144. bvalue_lst.append(line)
  145. # next line
  146. lines_idx += 1
  147. if lines_idx < line_count:
  148. line = lines[lines_idx]
  149. if line:
  150. continuation = line[0] in (32, 9) # (' ', '\t')
  151. else:
  152. line = b""
  153. break
  154. bvalue = b"".join(bvalue_lst)
  155. else:
  156. if header_length > self.max_field_size:
  157. raise LineTooLong(
  158. "request header field {}".format(
  159. bname.decode("utf8", "xmlcharrefreplace")
  160. ),
  161. str(self.max_field_size),
  162. str(header_length),
  163. )
  164. bvalue = bvalue.strip()
  165. name = bname.decode("utf-8", "surrogateescape")
  166. value = bvalue.decode("utf-8", "surrogateescape")
  167. headers.add(name, value)
  168. raw_headers.append((bname, bvalue))
  169. return (CIMultiDictProxy(headers), tuple(raw_headers))
  170. class HttpParser(abc.ABC):
  171. def __init__(
  172. self,
  173. protocol: Optional[BaseProtocol] = None,
  174. loop: Optional[asyncio.AbstractEventLoop] = None,
  175. limit: int = 2 ** 16,
  176. max_line_size: int = 8190,
  177. max_headers: int = 32768,
  178. max_field_size: int = 8190,
  179. timer: Optional[BaseTimerContext] = None,
  180. code: Optional[int] = None,
  181. method: Optional[str] = None,
  182. readall: bool = False,
  183. payload_exception: Optional[Type[BaseException]] = None,
  184. response_with_body: bool = True,
  185. read_until_eof: bool = False,
  186. auto_decompress: bool = True,
  187. ) -> None:
  188. self.protocol = protocol
  189. self.loop = loop
  190. self.max_line_size = max_line_size
  191. self.max_headers = max_headers
  192. self.max_field_size = max_field_size
  193. self.timer = timer
  194. self.code = code
  195. self.method = method
  196. self.readall = readall
  197. self.payload_exception = payload_exception
  198. self.response_with_body = response_with_body
  199. self.read_until_eof = read_until_eof
  200. self._lines = [] # type: List[bytes]
  201. self._tail = b""
  202. self._upgraded = False
  203. self._payload = None
  204. self._payload_parser = None # type: Optional[HttpPayloadParser]
  205. self._auto_decompress = auto_decompress
  206. self._limit = limit
  207. self._headers_parser = HeadersParser(max_line_size, max_headers, max_field_size)
  208. @abc.abstractmethod
  209. def parse_message(self, lines: List[bytes]) -> Any:
  210. pass
  211. def feed_eof(self) -> Any:
  212. if self._payload_parser is not None:
  213. self._payload_parser.feed_eof()
  214. self._payload_parser = None
  215. else:
  216. # try to extract partial message
  217. if self._tail:
  218. self._lines.append(self._tail)
  219. if self._lines:
  220. if self._lines[-1] != "\r\n":
  221. self._lines.append(b"")
  222. try:
  223. return self.parse_message(self._lines)
  224. except Exception:
  225. return None
  226. def feed_data(
  227. self,
  228. data: bytes,
  229. SEP: bytes = b"\r\n",
  230. EMPTY: bytes = b"",
  231. CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,
  232. METH_CONNECT: str = hdrs.METH_CONNECT,
  233. SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,
  234. ) -> Tuple[List[Any], bool, bytes]:
  235. messages = []
  236. if self._tail:
  237. data, self._tail = self._tail + data, b""
  238. data_len = len(data)
  239. start_pos = 0
  240. loop = self.loop
  241. while start_pos < data_len:
  242. # read HTTP message (request/response line + headers), \r\n\r\n
  243. # and split by lines
  244. if self._payload_parser is None and not self._upgraded:
  245. pos = data.find(SEP, start_pos)
  246. # consume \r\n
  247. if pos == start_pos and not self._lines:
  248. start_pos = pos + 2
  249. continue
  250. if pos >= start_pos:
  251. # line found
  252. self._lines.append(data[start_pos:pos])
  253. start_pos = pos + 2
  254. # \r\n\r\n found
  255. if self._lines[-1] == EMPTY:
  256. try:
  257. msg = self.parse_message(self._lines)
  258. finally:
  259. self._lines.clear()
  260. # payload length
  261. length = msg.headers.get(CONTENT_LENGTH)
  262. if length is not None:
  263. try:
  264. length = int(length)
  265. except ValueError:
  266. raise InvalidHeader(CONTENT_LENGTH)
  267. if length < 0:
  268. raise InvalidHeader(CONTENT_LENGTH)
  269. # do not support old websocket spec
  270. if SEC_WEBSOCKET_KEY1 in msg.headers:
  271. raise InvalidHeader(SEC_WEBSOCKET_KEY1)
  272. self._upgraded = msg.upgrade
  273. method = getattr(msg, "method", self.method)
  274. assert self.protocol is not None
  275. # calculate payload
  276. if (
  277. (length is not None and length > 0)
  278. or msg.chunked
  279. and not msg.upgrade
  280. ):
  281. payload = StreamReader(
  282. self.protocol,
  283. timer=self.timer,
  284. loop=loop,
  285. limit=self._limit,
  286. )
  287. payload_parser = HttpPayloadParser(
  288. payload,
  289. length=length,
  290. chunked=msg.chunked,
  291. method=method,
  292. compression=msg.compression,
  293. code=self.code,
  294. readall=self.readall,
  295. response_with_body=self.response_with_body,
  296. auto_decompress=self._auto_decompress,
  297. )
  298. if not payload_parser.done:
  299. self._payload_parser = payload_parser
  300. elif method == METH_CONNECT:
  301. payload = StreamReader(
  302. self.protocol,
  303. timer=self.timer,
  304. loop=loop,
  305. limit=self._limit,
  306. )
  307. self._upgraded = True
  308. self._payload_parser = HttpPayloadParser(
  309. payload,
  310. method=msg.method,
  311. compression=msg.compression,
  312. readall=True,
  313. auto_decompress=self._auto_decompress,
  314. )
  315. else:
  316. if (
  317. getattr(msg, "code", 100) >= 199
  318. and length is None
  319. and self.read_until_eof
  320. ):
  321. payload = StreamReader(
  322. self.protocol,
  323. timer=self.timer,
  324. loop=loop,
  325. limit=self._limit,
  326. )
  327. payload_parser = HttpPayloadParser(
  328. payload,
  329. length=length,
  330. chunked=msg.chunked,
  331. method=method,
  332. compression=msg.compression,
  333. code=self.code,
  334. readall=True,
  335. response_with_body=self.response_with_body,
  336. auto_decompress=self._auto_decompress,
  337. )
  338. if not payload_parser.done:
  339. self._payload_parser = payload_parser
  340. else:
  341. payload = EMPTY_PAYLOAD # type: ignore
  342. messages.append((msg, payload))
  343. else:
  344. self._tail = data[start_pos:]
  345. data = EMPTY
  346. break
  347. # no parser, just store
  348. elif self._payload_parser is None and self._upgraded:
  349. assert not self._lines
  350. break
  351. # feed payload
  352. elif data and start_pos < data_len:
  353. assert not self._lines
  354. assert self._payload_parser is not None
  355. try:
  356. eof, data = self._payload_parser.feed_data(data[start_pos:])
  357. except BaseException as exc:
  358. if self.payload_exception is not None:
  359. self._payload_parser.payload.set_exception(
  360. self.payload_exception(str(exc))
  361. )
  362. else:
  363. self._payload_parser.payload.set_exception(exc)
  364. eof = True
  365. data = b""
  366. if eof:
  367. start_pos = 0
  368. data_len = len(data)
  369. self._payload_parser = None
  370. continue
  371. else:
  372. break
  373. if data and start_pos < data_len:
  374. data = data[start_pos:]
  375. else:
  376. data = EMPTY
  377. return messages, self._upgraded, data
  378. def parse_headers(
  379. self, lines: List[bytes]
  380. ) -> Tuple[
  381. "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool
  382. ]:
  383. """Parses RFC 5322 headers from a stream.
  384. Line continuations are supported. Returns list of header name
  385. and value pairs. Header name is in upper case.
  386. """
  387. headers, raw_headers = self._headers_parser.parse_headers(lines)
  388. close_conn = None
  389. encoding = None
  390. upgrade = False
  391. chunked = False
  392. # keep-alive
  393. conn = headers.get(hdrs.CONNECTION)
  394. if conn:
  395. v = conn.lower()
  396. if v == "close":
  397. close_conn = True
  398. elif v == "keep-alive":
  399. close_conn = False
  400. elif v == "upgrade":
  401. upgrade = True
  402. # encoding
  403. enc = headers.get(hdrs.CONTENT_ENCODING)
  404. if enc:
  405. enc = enc.lower()
  406. if enc in ("gzip", "deflate", "br"):
  407. encoding = enc
  408. # chunking
  409. te = headers.get(hdrs.TRANSFER_ENCODING)
  410. if te and "chunked" in te.lower():
  411. chunked = True
  412. return (headers, raw_headers, close_conn, encoding, upgrade, chunked)
  413. def set_upgraded(self, val: bool) -> None:
  414. """Set connection upgraded (to websocket) mode.
  415. :param bool val: new state.
  416. """
  417. self._upgraded = val
  418. class HttpRequestParser(HttpParser):
  419. """Read request status line. Exception .http_exceptions.BadStatusLine
  420. could be raised in case of any errors in status line.
  421. Returns RawRequestMessage.
  422. """
  423. def parse_message(self, lines: List[bytes]) -> Any:
  424. # request line
  425. line = lines[0].decode("utf-8", "surrogateescape")
  426. try:
  427. method, path, version = line.split(None, 2)
  428. except ValueError:
  429. raise BadStatusLine(line) from None
  430. if len(path) > self.max_line_size:
  431. raise LineTooLong(
  432. "Status line is too long", str(self.max_line_size), str(len(path))
  433. )
  434. path_part, _hash_separator, url_fragment = path.partition("#")
  435. path_part, _question_mark_separator, qs_part = path_part.partition("?")
  436. # method
  437. if not METHRE.match(method):
  438. raise BadStatusLine(method)
  439. # version
  440. try:
  441. if version.startswith("HTTP/"):
  442. n1, n2 = version[5:].split(".", 1)
  443. version_o = HttpVersion(int(n1), int(n2))
  444. else:
  445. raise BadStatusLine(version)
  446. except Exception:
  447. raise BadStatusLine(version)
  448. # read headers
  449. (
  450. headers,
  451. raw_headers,
  452. close,
  453. compression,
  454. upgrade,
  455. chunked,
  456. ) = self.parse_headers(lines)
  457. if close is None: # then the headers weren't set in the request
  458. if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close
  459. close = True
  460. else: # HTTP 1.1 must ask to close.
  461. close = False
  462. return RawRequestMessage(
  463. method,
  464. path,
  465. version_o,
  466. headers,
  467. raw_headers,
  468. close,
  469. compression,
  470. upgrade,
  471. chunked,
  472. # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
  473. # NOTE: parser does, otherwise it results into the same
  474. # NOTE: HTTP Request-Line input producing different
  475. # NOTE: `yarl.URL()` objects
  476. URL.build(
  477. path=path_part,
  478. query_string=qs_part,
  479. fragment=url_fragment,
  480. encoded=True,
  481. ),
  482. )
  483. class HttpResponseParser(HttpParser):
  484. """Read response status line and headers.
  485. BadStatusLine could be raised in case of any errors in status line.
  486. Returns RawResponseMessage"""
  487. def parse_message(self, lines: List[bytes]) -> Any:
  488. line = lines[0].decode("utf-8", "surrogateescape")
  489. try:
  490. version, status = line.split(None, 1)
  491. except ValueError:
  492. raise BadStatusLine(line) from None
  493. try:
  494. status, reason = status.split(None, 1)
  495. except ValueError:
  496. reason = ""
  497. if len(reason) > self.max_line_size:
  498. raise LineTooLong(
  499. "Status line is too long", str(self.max_line_size), str(len(reason))
  500. )
  501. # version
  502. match = VERSRE.match(version)
  503. if match is None:
  504. raise BadStatusLine(line)
  505. version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
  506. # The status code is a three-digit number
  507. try:
  508. status_i = int(status)
  509. except ValueError:
  510. raise BadStatusLine(line) from None
  511. if status_i > 999:
  512. raise BadStatusLine(line)
  513. # read headers
  514. (
  515. headers,
  516. raw_headers,
  517. close,
  518. compression,
  519. upgrade,
  520. chunked,
  521. ) = self.parse_headers(lines)
  522. if close is None:
  523. close = version_o <= HttpVersion10
  524. return RawResponseMessage(
  525. version_o,
  526. status_i,
  527. reason.strip(),
  528. headers,
  529. raw_headers,
  530. close,
  531. compression,
  532. upgrade,
  533. chunked,
  534. )
  535. class HttpPayloadParser:
  536. def __init__(
  537. self,
  538. payload: StreamReader,
  539. length: Optional[int] = None,
  540. chunked: bool = False,
  541. compression: Optional[str] = None,
  542. code: Optional[int] = None,
  543. method: Optional[str] = None,
  544. readall: bool = False,
  545. response_with_body: bool = True,
  546. auto_decompress: bool = True,
  547. ) -> None:
  548. self._length = 0
  549. self._type = ParseState.PARSE_NONE
  550. self._chunk = ChunkState.PARSE_CHUNKED_SIZE
  551. self._chunk_size = 0
  552. self._chunk_tail = b""
  553. self._auto_decompress = auto_decompress
  554. self.done = False
  555. # payload decompression wrapper
  556. if response_with_body and compression and self._auto_decompress:
  557. real_payload = DeflateBuffer(
  558. payload, compression
  559. ) # type: Union[StreamReader, DeflateBuffer]
  560. else:
  561. real_payload = payload
  562. # payload parser
  563. if not response_with_body:
  564. # don't parse payload if it's not expected to be received
  565. self._type = ParseState.PARSE_NONE
  566. real_payload.feed_eof()
  567. self.done = True
  568. elif chunked:
  569. self._type = ParseState.PARSE_CHUNKED
  570. elif length is not None:
  571. self._type = ParseState.PARSE_LENGTH
  572. self._length = length
  573. if self._length == 0:
  574. real_payload.feed_eof()
  575. self.done = True
  576. else:
  577. if readall and code != 204:
  578. self._type = ParseState.PARSE_UNTIL_EOF
  579. elif method in ("PUT", "POST"):
  580. internal_logger.warning( # pragma: no cover
  581. "Content-Length or Transfer-Encoding header is required"
  582. )
  583. self._type = ParseState.PARSE_NONE
  584. real_payload.feed_eof()
  585. self.done = True
  586. self.payload = real_payload
  587. def feed_eof(self) -> None:
  588. if self._type == ParseState.PARSE_UNTIL_EOF:
  589. self.payload.feed_eof()
  590. elif self._type == ParseState.PARSE_LENGTH:
  591. raise ContentLengthError(
  592. "Not enough data for satisfy content length header."
  593. )
  594. elif self._type == ParseState.PARSE_CHUNKED:
  595. raise TransferEncodingError(
  596. "Not enough data for satisfy transfer length header."
  597. )
  598. def feed_data(
  599. self, chunk: bytes, SEP: bytes = b"\r\n", CHUNK_EXT: bytes = b";"
  600. ) -> Tuple[bool, bytes]:
  601. # Read specified amount of bytes
  602. if self._type == ParseState.PARSE_LENGTH:
  603. required = self._length
  604. chunk_len = len(chunk)
  605. if required >= chunk_len:
  606. self._length = required - chunk_len
  607. self.payload.feed_data(chunk, chunk_len)
  608. if self._length == 0:
  609. self.payload.feed_eof()
  610. return True, b""
  611. else:
  612. self._length = 0
  613. self.payload.feed_data(chunk[:required], required)
  614. self.payload.feed_eof()
  615. return True, chunk[required:]
  616. # Chunked transfer encoding parser
  617. elif self._type == ParseState.PARSE_CHUNKED:
  618. if self._chunk_tail:
  619. chunk = self._chunk_tail + chunk
  620. self._chunk_tail = b""
  621. while chunk:
  622. # read next chunk size
  623. if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:
  624. pos = chunk.find(SEP)
  625. if pos >= 0:
  626. i = chunk.find(CHUNK_EXT, 0, pos)
  627. if i >= 0:
  628. size_b = chunk[:i] # strip chunk-extensions
  629. else:
  630. size_b = chunk[:pos]
  631. try:
  632. size = int(bytes(size_b), 16)
  633. except ValueError:
  634. exc = TransferEncodingError(
  635. chunk[:pos].decode("ascii", "surrogateescape")
  636. )
  637. self.payload.set_exception(exc)
  638. raise exc from None
  639. chunk = chunk[pos + 2 :]
  640. if size == 0: # eof marker
  641. self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
  642. else:
  643. self._chunk = ChunkState.PARSE_CHUNKED_CHUNK
  644. self._chunk_size = size
  645. self.payload.begin_http_chunk_receiving()
  646. else:
  647. self._chunk_tail = chunk
  648. return False, b""
  649. # read chunk and feed buffer
  650. if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:
  651. required = self._chunk_size
  652. chunk_len = len(chunk)
  653. if required > chunk_len:
  654. self._chunk_size = required - chunk_len
  655. self.payload.feed_data(chunk, chunk_len)
  656. return False, b""
  657. else:
  658. self._chunk_size = 0
  659. self.payload.feed_data(chunk[:required], required)
  660. chunk = chunk[required:]
  661. self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
  662. self.payload.end_http_chunk_receiving()
  663. # toss the CRLF at the end of the chunk
  664. if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
  665. if chunk[:2] == SEP:
  666. chunk = chunk[2:]
  667. self._chunk = ChunkState.PARSE_CHUNKED_SIZE
  668. else:
  669. self._chunk_tail = chunk
  670. return False, b""
  671. # if stream does not contain trailer, after 0\r\n
  672. # we should get another \r\n otherwise
  673. # trailers needs to be skiped until \r\n\r\n
  674. if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
  675. head = chunk[:2]
  676. if head == SEP:
  677. # end of stream
  678. self.payload.feed_eof()
  679. return True, chunk[2:]
  680. # Both CR and LF, or only LF may not be received yet. It is
  681. # expected that CRLF or LF will be shown at the very first
  682. # byte next time, otherwise trailers should come. The last
  683. # CRLF which marks the end of response might not be
  684. # contained in the same TCP segment which delivered the
  685. # size indicator.
  686. if not head:
  687. return False, b""
  688. if head == SEP[:1]:
  689. self._chunk_tail = head
  690. return False, b""
  691. self._chunk = ChunkState.PARSE_TRAILERS
  692. # read and discard trailer up to the CRLF terminator
  693. if self._chunk == ChunkState.PARSE_TRAILERS:
  694. pos = chunk.find(SEP)
  695. if pos >= 0:
  696. chunk = chunk[pos + 2 :]
  697. self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
  698. else:
  699. self._chunk_tail = chunk
  700. return False, b""
  701. # Read all bytes until eof
  702. elif self._type == ParseState.PARSE_UNTIL_EOF:
  703. self.payload.feed_data(chunk, len(chunk))
  704. return False, b""
  705. class DeflateBuffer:
  706. """DeflateStream decompress stream and feed data into specified stream."""
  707. def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:
  708. self.out = out
  709. self.size = 0
  710. self.encoding = encoding
  711. self._started_decoding = False
  712. if encoding == "br":
  713. if not HAS_BROTLI: # pragma: no cover
  714. raise ContentEncodingError(
  715. "Can not decode content-encoding: brotli (br). "
  716. "Please install `brotlipy`"
  717. )
  718. self.decompressor = brotli.Decompressor()
  719. else:
  720. zlib_mode = 16 + zlib.MAX_WBITS if encoding == "gzip" else zlib.MAX_WBITS
  721. self.decompressor = zlib.decompressobj(wbits=zlib_mode)
  722. def set_exception(self, exc: BaseException) -> None:
  723. self.out.set_exception(exc)
  724. def feed_data(self, chunk: bytes, size: int) -> None:
  725. if not size:
  726. return
  727. self.size += size
  728. # RFC1950
  729. # bits 0..3 = CM = 0b1000 = 8 = "deflate"
  730. # bits 4..7 = CINFO = 1..7 = windows size.
  731. if (
  732. not self._started_decoding
  733. and self.encoding == "deflate"
  734. and chunk[0] & 0xF != 8
  735. ):
  736. # Change the decoder to decompress incorrectly compressed data
  737. # Actually we should issue a warning about non-RFC-compliant data.
  738. self.decompressor = zlib.decompressobj(wbits=-zlib.MAX_WBITS)
  739. try:
  740. chunk = self.decompressor.decompress(chunk)
  741. except Exception:
  742. raise ContentEncodingError(
  743. "Can not decode content-encoding: %s" % self.encoding
  744. )
  745. self._started_decoding = True
  746. if chunk:
  747. self.out.feed_data(chunk, len(chunk))
  748. def feed_eof(self) -> None:
  749. chunk = self.decompressor.flush()
  750. if chunk or self.size > 0:
  751. self.out.feed_data(chunk, len(chunk))
  752. if self.encoding == "deflate" and not self.decompressor.eof:
  753. raise ContentEncodingError("deflate")
  754. self.out.feed_eof()
  755. def begin_http_chunk_receiving(self) -> None:
  756. self.out.begin_http_chunk_receiving()
  757. def end_http_chunk_receiving(self) -> None:
  758. self.out.end_http_chunk_receiving()
  759. HttpRequestParserPy = HttpRequestParser
  760. HttpResponseParserPy = HttpResponseParser
  761. RawRequestMessagePy = RawRequestMessage
  762. RawResponseMessagePy = RawResponseMessage
  763. try:
  764. if not NO_EXTENSIONS:
  765. from ._http_parser import ( # type: ignore
  766. HttpRequestParser,
  767. HttpResponseParser,
  768. RawRequestMessage,
  769. RawResponseMessage,
  770. )
  771. HttpRequestParserC = HttpRequestParser
  772. HttpResponseParserC = HttpResponseParser
  773. RawRequestMessageC = RawRequestMessage
  774. RawResponseMessageC = RawResponseMessage
  775. except ImportError: # pragma: no cover
  776. pass