_http_parser.pyx 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875
  1. #cython: language_level=3
  2. #
  3. # Based on https://github.com/MagicStack/httptools
  4. #
  5. from __future__ import absolute_import, print_function
  6. from cpython cimport (
  7. Py_buffer,
  8. PyBUF_SIMPLE,
  9. PyBuffer_Release,
  10. PyBytes_AsString,
  11. PyBytes_AsStringAndSize,
  12. PyObject_GetBuffer,
  13. )
  14. from cpython.mem cimport PyMem_Free, PyMem_Malloc
  15. from libc.limits cimport ULLONG_MAX
  16. from libc.string cimport memcpy
  17. from multidict import CIMultiDict as _CIMultiDict, CIMultiDictProxy as _CIMultiDictProxy
  18. from yarl import URL as _URL
  19. from aiohttp import hdrs
  20. from .http_exceptions import (
  21. BadHttpMessage,
  22. BadStatusLine,
  23. ContentLengthError,
  24. InvalidHeader,
  25. InvalidURLError,
  26. LineTooLong,
  27. PayloadEncodingError,
  28. TransferEncodingError,
  29. )
  30. from .http_parser import DeflateBuffer as _DeflateBuffer
  31. from .http_writer import (
  32. HttpVersion as _HttpVersion,
  33. HttpVersion10 as _HttpVersion10,
  34. HttpVersion11 as _HttpVersion11,
  35. )
  36. from .streams import EMPTY_PAYLOAD as _EMPTY_PAYLOAD, StreamReader as _StreamReader
  37. cimport cython
  38. from aiohttp cimport _cparser as cparser
  39. include "_headers.pxi"
  40. from aiohttp cimport _find_header
  41. DEF DEFAULT_FREELIST_SIZE = 250
  42. cdef extern from "Python.h":
  43. int PyByteArray_Resize(object, Py_ssize_t) except -1
  44. Py_ssize_t PyByteArray_Size(object) except -1
  45. char* PyByteArray_AsString(object)
  46. __all__ = ('HttpRequestParser', 'HttpResponseParser',
  47. 'RawRequestMessage', 'RawResponseMessage')
  48. cdef object URL = _URL
  49. cdef object URL_build = URL.build
  50. cdef object CIMultiDict = _CIMultiDict
  51. cdef object CIMultiDictProxy = _CIMultiDictProxy
  52. cdef object HttpVersion = _HttpVersion
  53. cdef object HttpVersion10 = _HttpVersion10
  54. cdef object HttpVersion11 = _HttpVersion11
  55. cdef object SEC_WEBSOCKET_KEY1 = hdrs.SEC_WEBSOCKET_KEY1
  56. cdef object CONTENT_ENCODING = hdrs.CONTENT_ENCODING
  57. cdef object EMPTY_PAYLOAD = _EMPTY_PAYLOAD
  58. cdef object StreamReader = _StreamReader
  59. cdef object DeflateBuffer = _DeflateBuffer
  60. cdef inline object extend(object buf, const char* at, size_t length):
  61. cdef Py_ssize_t s
  62. cdef char* ptr
  63. s = PyByteArray_Size(buf)
  64. PyByteArray_Resize(buf, s + length)
  65. ptr = PyByteArray_AsString(buf)
  66. memcpy(ptr + s, at, length)
  67. DEF METHODS_COUNT = 34;
  68. cdef list _http_method = []
  69. for i in range(METHODS_COUNT):
  70. _http_method.append(
  71. cparser.http_method_str(<cparser.http_method> i).decode('ascii'))
  72. cdef inline str http_method_str(int i):
  73. if i < METHODS_COUNT:
  74. return <str>_http_method[i]
  75. else:
  76. return "<unknown>"
  77. cdef inline object find_header(bytes raw_header):
  78. cdef Py_ssize_t size
  79. cdef char *buf
  80. cdef int idx
  81. PyBytes_AsStringAndSize(raw_header, &buf, &size)
  82. idx = _find_header.find_header(buf, size)
  83. if idx == -1:
  84. return raw_header.decode('utf-8', 'surrogateescape')
  85. return headers[idx]
  86. @cython.freelist(DEFAULT_FREELIST_SIZE)
  87. cdef class RawRequestMessage:
  88. cdef readonly str method
  89. cdef readonly str path
  90. cdef readonly object version # HttpVersion
  91. cdef readonly object headers # CIMultiDict
  92. cdef readonly object raw_headers # tuple
  93. cdef readonly object should_close
  94. cdef readonly object compression
  95. cdef readonly object upgrade
  96. cdef readonly object chunked
  97. cdef readonly object url # yarl.URL
  98. def __init__(self, method, path, version, headers, raw_headers,
  99. should_close, compression, upgrade, chunked, url):
  100. self.method = method
  101. self.path = path
  102. self.version = version
  103. self.headers = headers
  104. self.raw_headers = raw_headers
  105. self.should_close = should_close
  106. self.compression = compression
  107. self.upgrade = upgrade
  108. self.chunked = chunked
  109. self.url = url
  110. def __repr__(self):
  111. info = []
  112. info.append(("method", self.method))
  113. info.append(("path", self.path))
  114. info.append(("version", self.version))
  115. info.append(("headers", self.headers))
  116. info.append(("raw_headers", self.raw_headers))
  117. info.append(("should_close", self.should_close))
  118. info.append(("compression", self.compression))
  119. info.append(("upgrade", self.upgrade))
  120. info.append(("chunked", self.chunked))
  121. info.append(("url", self.url))
  122. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  123. return '<RawRequestMessage(' + sinfo + ')>'
  124. def _replace(self, **dct):
  125. cdef RawRequestMessage ret
  126. ret = _new_request_message(self.method,
  127. self.path,
  128. self.version,
  129. self.headers,
  130. self.raw_headers,
  131. self.should_close,
  132. self.compression,
  133. self.upgrade,
  134. self.chunked,
  135. self.url)
  136. if "method" in dct:
  137. ret.method = dct["method"]
  138. if "path" in dct:
  139. ret.path = dct["path"]
  140. if "version" in dct:
  141. ret.version = dct["version"]
  142. if "headers" in dct:
  143. ret.headers = dct["headers"]
  144. if "raw_headers" in dct:
  145. ret.raw_headers = dct["raw_headers"]
  146. if "should_close" in dct:
  147. ret.should_close = dct["should_close"]
  148. if "compression" in dct:
  149. ret.compression = dct["compression"]
  150. if "upgrade" in dct:
  151. ret.upgrade = dct["upgrade"]
  152. if "chunked" in dct:
  153. ret.chunked = dct["chunked"]
  154. if "url" in dct:
  155. ret.url = dct["url"]
  156. return ret
  157. cdef _new_request_message(str method,
  158. str path,
  159. object version,
  160. object headers,
  161. object raw_headers,
  162. bint should_close,
  163. object compression,
  164. bint upgrade,
  165. bint chunked,
  166. object url):
  167. cdef RawRequestMessage ret
  168. ret = RawRequestMessage.__new__(RawRequestMessage)
  169. ret.method = method
  170. ret.path = path
  171. ret.version = version
  172. ret.headers = headers
  173. ret.raw_headers = raw_headers
  174. ret.should_close = should_close
  175. ret.compression = compression
  176. ret.upgrade = upgrade
  177. ret.chunked = chunked
  178. ret.url = url
  179. return ret
  180. @cython.freelist(DEFAULT_FREELIST_SIZE)
  181. cdef class RawResponseMessage:
  182. cdef readonly object version # HttpVersion
  183. cdef readonly int code
  184. cdef readonly str reason
  185. cdef readonly object headers # CIMultiDict
  186. cdef readonly object raw_headers # tuple
  187. cdef readonly object should_close
  188. cdef readonly object compression
  189. cdef readonly object upgrade
  190. cdef readonly object chunked
  191. def __init__(self, version, code, reason, headers, raw_headers,
  192. should_close, compression, upgrade, chunked):
  193. self.version = version
  194. self.code = code
  195. self.reason = reason
  196. self.headers = headers
  197. self.raw_headers = raw_headers
  198. self.should_close = should_close
  199. self.compression = compression
  200. self.upgrade = upgrade
  201. self.chunked = chunked
  202. def __repr__(self):
  203. info = []
  204. info.append(("version", self.version))
  205. info.append(("code", self.code))
  206. info.append(("reason", self.reason))
  207. info.append(("headers", self.headers))
  208. info.append(("raw_headers", self.raw_headers))
  209. info.append(("should_close", self.should_close))
  210. info.append(("compression", self.compression))
  211. info.append(("upgrade", self.upgrade))
  212. info.append(("chunked", self.chunked))
  213. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  214. return '<RawResponseMessage(' + sinfo + ')>'
  215. cdef _new_response_message(object version,
  216. int code,
  217. str reason,
  218. object headers,
  219. object raw_headers,
  220. bint should_close,
  221. object compression,
  222. bint upgrade,
  223. bint chunked):
  224. cdef RawResponseMessage ret
  225. ret = RawResponseMessage.__new__(RawResponseMessage)
  226. ret.version = version
  227. ret.code = code
  228. ret.reason = reason
  229. ret.headers = headers
  230. ret.raw_headers = raw_headers
  231. ret.should_close = should_close
  232. ret.compression = compression
  233. ret.upgrade = upgrade
  234. ret.chunked = chunked
  235. return ret
  236. @cython.internal
  237. cdef class HttpParser:
  238. cdef:
  239. cparser.http_parser* _cparser
  240. cparser.http_parser_settings* _csettings
  241. bytearray _raw_name
  242. bytearray _raw_value
  243. bint _has_value
  244. object _protocol
  245. object _loop
  246. object _timer
  247. size_t _max_line_size
  248. size_t _max_field_size
  249. size_t _max_headers
  250. bint _response_with_body
  251. bint _read_until_eof
  252. bint _started
  253. object _url
  254. bytearray _buf
  255. str _path
  256. str _reason
  257. object _headers
  258. list _raw_headers
  259. bint _upgraded
  260. list _messages
  261. object _payload
  262. bint _payload_error
  263. object _payload_exception
  264. object _last_error
  265. bint _auto_decompress
  266. int _limit
  267. str _content_encoding
  268. Py_buffer py_buf
  269. def __cinit__(self):
  270. self._cparser = <cparser.http_parser*> \
  271. PyMem_Malloc(sizeof(cparser.http_parser))
  272. if self._cparser is NULL:
  273. raise MemoryError()
  274. self._csettings = <cparser.http_parser_settings*> \
  275. PyMem_Malloc(sizeof(cparser.http_parser_settings))
  276. if self._csettings is NULL:
  277. raise MemoryError()
  278. def __dealloc__(self):
  279. PyMem_Free(self._cparser)
  280. PyMem_Free(self._csettings)
  281. cdef _init(self, cparser.http_parser_type mode,
  282. object protocol, object loop, int limit,
  283. object timer=None,
  284. size_t max_line_size=8190, size_t max_headers=32768,
  285. size_t max_field_size=8190, payload_exception=None,
  286. bint response_with_body=True, bint read_until_eof=False,
  287. bint auto_decompress=True):
  288. cparser.http_parser_init(self._cparser, mode)
  289. self._cparser.data = <void*>self
  290. self._cparser.content_length = 0
  291. cparser.http_parser_settings_init(self._csettings)
  292. self._protocol = protocol
  293. self._loop = loop
  294. self._timer = timer
  295. self._buf = bytearray()
  296. self._payload = None
  297. self._payload_error = 0
  298. self._payload_exception = payload_exception
  299. self._messages = []
  300. self._raw_name = bytearray()
  301. self._raw_value = bytearray()
  302. self._has_value = False
  303. self._max_line_size = max_line_size
  304. self._max_headers = max_headers
  305. self._max_field_size = max_field_size
  306. self._response_with_body = response_with_body
  307. self._read_until_eof = read_until_eof
  308. self._upgraded = False
  309. self._auto_decompress = auto_decompress
  310. self._content_encoding = None
  311. self._csettings.on_url = cb_on_url
  312. self._csettings.on_status = cb_on_status
  313. self._csettings.on_header_field = cb_on_header_field
  314. self._csettings.on_header_value = cb_on_header_value
  315. self._csettings.on_headers_complete = cb_on_headers_complete
  316. self._csettings.on_body = cb_on_body
  317. self._csettings.on_message_begin = cb_on_message_begin
  318. self._csettings.on_message_complete = cb_on_message_complete
  319. self._csettings.on_chunk_header = cb_on_chunk_header
  320. self._csettings.on_chunk_complete = cb_on_chunk_complete
  321. self._last_error = None
  322. self._limit = limit
  323. cdef _process_header(self):
  324. if self._raw_name:
  325. raw_name = bytes(self._raw_name)
  326. raw_value = bytes(self._raw_value)
  327. name = find_header(raw_name)
  328. value = raw_value.decode('utf-8', 'surrogateescape')
  329. self._headers.add(name, value)
  330. if name is CONTENT_ENCODING:
  331. self._content_encoding = value
  332. PyByteArray_Resize(self._raw_name, 0)
  333. PyByteArray_Resize(self._raw_value, 0)
  334. self._has_value = False
  335. self._raw_headers.append((raw_name, raw_value))
  336. cdef _on_header_field(self, char* at, size_t length):
  337. cdef Py_ssize_t size
  338. cdef char *buf
  339. if self._has_value:
  340. self._process_header()
  341. size = PyByteArray_Size(self._raw_name)
  342. PyByteArray_Resize(self._raw_name, size + length)
  343. buf = PyByteArray_AsString(self._raw_name)
  344. memcpy(buf + size, at, length)
  345. cdef _on_header_value(self, char* at, size_t length):
  346. cdef Py_ssize_t size
  347. cdef char *buf
  348. size = PyByteArray_Size(self._raw_value)
  349. PyByteArray_Resize(self._raw_value, size + length)
  350. buf = PyByteArray_AsString(self._raw_value)
  351. memcpy(buf + size, at, length)
  352. self._has_value = True
  353. cdef _on_headers_complete(self):
  354. self._process_header()
  355. method = http_method_str(self._cparser.method)
  356. should_close = not cparser.http_should_keep_alive(self._cparser)
  357. upgrade = self._cparser.upgrade
  358. chunked = self._cparser.flags & cparser.F_CHUNKED
  359. raw_headers = tuple(self._raw_headers)
  360. headers = CIMultiDictProxy(self._headers)
  361. if upgrade or self._cparser.method == 5: # cparser.CONNECT:
  362. self._upgraded = True
  363. # do not support old websocket spec
  364. if SEC_WEBSOCKET_KEY1 in headers:
  365. raise InvalidHeader(SEC_WEBSOCKET_KEY1)
  366. encoding = None
  367. enc = self._content_encoding
  368. if enc is not None:
  369. self._content_encoding = None
  370. enc = enc.lower()
  371. if enc in ('gzip', 'deflate', 'br'):
  372. encoding = enc
  373. if self._cparser.type == cparser.HTTP_REQUEST:
  374. msg = _new_request_message(
  375. method, self._path,
  376. self.http_version(), headers, raw_headers,
  377. should_close, encoding, upgrade, chunked, self._url)
  378. else:
  379. msg = _new_response_message(
  380. self.http_version(), self._cparser.status_code, self._reason,
  381. headers, raw_headers, should_close, encoding,
  382. upgrade, chunked)
  383. if (ULLONG_MAX > self._cparser.content_length > 0 or chunked or
  384. self._cparser.method == 5 or # CONNECT: 5
  385. (self._cparser.status_code >= 199 and
  386. self._cparser.content_length == ULLONG_MAX and
  387. self._read_until_eof)
  388. ):
  389. payload = StreamReader(
  390. self._protocol, timer=self._timer, loop=self._loop,
  391. limit=self._limit)
  392. else:
  393. payload = EMPTY_PAYLOAD
  394. self._payload = payload
  395. if encoding is not None and self._auto_decompress:
  396. self._payload = DeflateBuffer(payload, encoding)
  397. if not self._response_with_body:
  398. payload = EMPTY_PAYLOAD
  399. self._messages.append((msg, payload))
  400. cdef _on_message_complete(self):
  401. self._payload.feed_eof()
  402. self._payload = None
  403. cdef _on_chunk_header(self):
  404. self._payload.begin_http_chunk_receiving()
  405. cdef _on_chunk_complete(self):
  406. self._payload.end_http_chunk_receiving()
  407. cdef object _on_status_complete(self):
  408. pass
  409. cdef inline http_version(self):
  410. cdef cparser.http_parser* parser = self._cparser
  411. if parser.http_major == 1:
  412. if parser.http_minor == 0:
  413. return HttpVersion10
  414. elif parser.http_minor == 1:
  415. return HttpVersion11
  416. return HttpVersion(parser.http_major, parser.http_minor)
  417. ### Public API ###
  418. def feed_eof(self):
  419. cdef bytes desc
  420. if self._payload is not None:
  421. if self._cparser.flags & cparser.F_CHUNKED:
  422. raise TransferEncodingError(
  423. "Not enough data for satisfy transfer length header.")
  424. elif self._cparser.flags & cparser.F_CONTENTLENGTH:
  425. raise ContentLengthError(
  426. "Not enough data for satisfy content length header.")
  427. elif self._cparser.http_errno != cparser.HPE_OK:
  428. desc = cparser.http_errno_description(
  429. <cparser.http_errno> self._cparser.http_errno)
  430. raise PayloadEncodingError(desc.decode('latin-1'))
  431. else:
  432. self._payload.feed_eof()
  433. elif self._started:
  434. self._on_headers_complete()
  435. if self._messages:
  436. return self._messages[-1][0]
  437. def feed_data(self, data):
  438. cdef:
  439. size_t data_len
  440. size_t nb
  441. PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE)
  442. data_len = <size_t>self.py_buf.len
  443. nb = cparser.http_parser_execute(
  444. self._cparser,
  445. self._csettings,
  446. <char*>self.py_buf.buf,
  447. data_len)
  448. PyBuffer_Release(&self.py_buf)
  449. if (self._cparser.http_errno != cparser.HPE_OK):
  450. if self._payload_error == 0:
  451. if self._last_error is not None:
  452. ex = self._last_error
  453. self._last_error = None
  454. else:
  455. ex = parser_error_from_errno(
  456. <cparser.http_errno> self._cparser.http_errno)
  457. self._payload = None
  458. raise ex
  459. if self._messages:
  460. messages = self._messages
  461. self._messages = []
  462. else:
  463. messages = ()
  464. if self._upgraded:
  465. return messages, True, data[nb:]
  466. else:
  467. return messages, False, b''
  468. def set_upgraded(self, val):
  469. self._upgraded = val
  470. cdef class HttpRequestParser(HttpParser):
  471. def __init__(self, protocol, loop, int limit, timer=None,
  472. size_t max_line_size=8190, size_t max_headers=32768,
  473. size_t max_field_size=8190, payload_exception=None,
  474. bint response_with_body=True, bint read_until_eof=False,
  475. ):
  476. self._init(cparser.HTTP_REQUEST, protocol, loop, limit, timer,
  477. max_line_size, max_headers, max_field_size,
  478. payload_exception, response_with_body, read_until_eof)
  479. cdef object _on_status_complete(self):
  480. cdef Py_buffer py_buf
  481. if not self._buf:
  482. return
  483. self._path = self._buf.decode('utf-8', 'surrogateescape')
  484. if self._cparser.method == 5: # CONNECT
  485. self._url = URL(self._path)
  486. else:
  487. PyObject_GetBuffer(self._buf, &py_buf, PyBUF_SIMPLE)
  488. try:
  489. self._url = _parse_url(<char*>py_buf.buf,
  490. py_buf.len)
  491. finally:
  492. PyBuffer_Release(&py_buf)
  493. PyByteArray_Resize(self._buf, 0)
  494. cdef class HttpResponseParser(HttpParser):
  495. def __init__(self, protocol, loop, int limit, timer=None,
  496. size_t max_line_size=8190, size_t max_headers=32768,
  497. size_t max_field_size=8190, payload_exception=None,
  498. bint response_with_body=True, bint read_until_eof=False,
  499. bint auto_decompress=True
  500. ):
  501. self._init(cparser.HTTP_RESPONSE, protocol, loop, limit, timer,
  502. max_line_size, max_headers, max_field_size,
  503. payload_exception, response_with_body, read_until_eof,
  504. auto_decompress)
  505. cdef object _on_status_complete(self):
  506. if self._buf:
  507. self._reason = self._buf.decode('utf-8', 'surrogateescape')
  508. PyByteArray_Resize(self._buf, 0)
  509. else:
  510. self._reason = self._reason or ''
  511. cdef int cb_on_message_begin(cparser.http_parser* parser) except -1:
  512. cdef HttpParser pyparser = <HttpParser>parser.data
  513. pyparser._started = True
  514. pyparser._headers = CIMultiDict()
  515. pyparser._raw_headers = []
  516. PyByteArray_Resize(pyparser._buf, 0)
  517. pyparser._path = None
  518. pyparser._reason = None
  519. return 0
  520. cdef int cb_on_url(cparser.http_parser* parser,
  521. const char *at, size_t length) except -1:
  522. cdef HttpParser pyparser = <HttpParser>parser.data
  523. try:
  524. if length > pyparser._max_line_size:
  525. raise LineTooLong(
  526. 'Status line is too long', pyparser._max_line_size, length)
  527. extend(pyparser._buf, at, length)
  528. except BaseException as ex:
  529. pyparser._last_error = ex
  530. return -1
  531. else:
  532. return 0
  533. cdef int cb_on_status(cparser.http_parser* parser,
  534. const char *at, size_t length) except -1:
  535. cdef HttpParser pyparser = <HttpParser>parser.data
  536. cdef str reason
  537. try:
  538. if length > pyparser._max_line_size:
  539. raise LineTooLong(
  540. 'Status line is too long', pyparser._max_line_size, length)
  541. extend(pyparser._buf, at, length)
  542. except BaseException as ex:
  543. pyparser._last_error = ex
  544. return -1
  545. else:
  546. return 0
  547. cdef int cb_on_header_field(cparser.http_parser* parser,
  548. const char *at, size_t length) except -1:
  549. cdef HttpParser pyparser = <HttpParser>parser.data
  550. cdef Py_ssize_t size
  551. try:
  552. pyparser._on_status_complete()
  553. size = len(pyparser._raw_name) + length
  554. if size > pyparser._max_field_size:
  555. raise LineTooLong(
  556. 'Header name is too long', pyparser._max_field_size, size)
  557. pyparser._on_header_field(at, length)
  558. except BaseException as ex:
  559. pyparser._last_error = ex
  560. return -1
  561. else:
  562. return 0
  563. cdef int cb_on_header_value(cparser.http_parser* parser,
  564. const char *at, size_t length) except -1:
  565. cdef HttpParser pyparser = <HttpParser>parser.data
  566. cdef Py_ssize_t size
  567. try:
  568. size = len(pyparser._raw_value) + length
  569. if size > pyparser._max_field_size:
  570. raise LineTooLong(
  571. 'Header value is too long', pyparser._max_field_size, size)
  572. pyparser._on_header_value(at, length)
  573. except BaseException as ex:
  574. pyparser._last_error = ex
  575. return -1
  576. else:
  577. return 0
  578. cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1:
  579. cdef HttpParser pyparser = <HttpParser>parser.data
  580. try:
  581. pyparser._on_status_complete()
  582. pyparser._on_headers_complete()
  583. except BaseException as exc:
  584. pyparser._last_error = exc
  585. return -1
  586. else:
  587. if pyparser._cparser.upgrade or pyparser._cparser.method == 5: # CONNECT
  588. return 2
  589. else:
  590. return 0
  591. cdef int cb_on_body(cparser.http_parser* parser,
  592. const char *at, size_t length) except -1:
  593. cdef HttpParser pyparser = <HttpParser>parser.data
  594. cdef bytes body = at[:length]
  595. try:
  596. pyparser._payload.feed_data(body, length)
  597. except BaseException as exc:
  598. if pyparser._payload_exception is not None:
  599. pyparser._payload.set_exception(pyparser._payload_exception(str(exc)))
  600. else:
  601. pyparser._payload.set_exception(exc)
  602. pyparser._payload_error = 1
  603. return -1
  604. else:
  605. return 0
  606. cdef int cb_on_message_complete(cparser.http_parser* parser) except -1:
  607. cdef HttpParser pyparser = <HttpParser>parser.data
  608. try:
  609. pyparser._started = False
  610. pyparser._on_message_complete()
  611. except BaseException as exc:
  612. pyparser._last_error = exc
  613. return -1
  614. else:
  615. return 0
  616. cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1:
  617. cdef HttpParser pyparser = <HttpParser>parser.data
  618. try:
  619. pyparser._on_chunk_header()
  620. except BaseException as exc:
  621. pyparser._last_error = exc
  622. return -1
  623. else:
  624. return 0
  625. cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1:
  626. cdef HttpParser pyparser = <HttpParser>parser.data
  627. try:
  628. pyparser._on_chunk_complete()
  629. except BaseException as exc:
  630. pyparser._last_error = exc
  631. return -1
  632. else:
  633. return 0
  634. cdef parser_error_from_errno(cparser.http_errno errno):
  635. cdef bytes desc = cparser.http_errno_description(errno)
  636. if errno in (cparser.HPE_CB_message_begin,
  637. cparser.HPE_CB_url,
  638. cparser.HPE_CB_header_field,
  639. cparser.HPE_CB_header_value,
  640. cparser.HPE_CB_headers_complete,
  641. cparser.HPE_CB_body,
  642. cparser.HPE_CB_message_complete,
  643. cparser.HPE_CB_status,
  644. cparser.HPE_CB_chunk_header,
  645. cparser.HPE_CB_chunk_complete):
  646. cls = BadHttpMessage
  647. elif errno == cparser.HPE_INVALID_STATUS:
  648. cls = BadStatusLine
  649. elif errno == cparser.HPE_INVALID_METHOD:
  650. cls = BadStatusLine
  651. elif errno == cparser.HPE_INVALID_URL:
  652. cls = InvalidURLError
  653. else:
  654. cls = BadHttpMessage
  655. return cls(desc.decode('latin-1'))
  656. def parse_url(url):
  657. cdef:
  658. Py_buffer py_buf
  659. char* buf_data
  660. PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE)
  661. try:
  662. buf_data = <char*>py_buf.buf
  663. return _parse_url(buf_data, py_buf.len)
  664. finally:
  665. PyBuffer_Release(&py_buf)
  666. cdef _parse_url(char* buf_data, size_t length):
  667. cdef:
  668. cparser.http_parser_url* parsed
  669. int res
  670. str schema = None
  671. str host = None
  672. object port = None
  673. str path = None
  674. str query = None
  675. str fragment = None
  676. str user = None
  677. str password = None
  678. str userinfo = None
  679. object result = None
  680. int off
  681. int ln
  682. parsed = <cparser.http_parser_url*> \
  683. PyMem_Malloc(sizeof(cparser.http_parser_url))
  684. if parsed is NULL:
  685. raise MemoryError()
  686. cparser.http_parser_url_init(parsed)
  687. try:
  688. res = cparser.http_parser_parse_url(buf_data, length, 0, parsed)
  689. if res == 0:
  690. if parsed.field_set & (1 << cparser.UF_SCHEMA):
  691. off = parsed.field_data[<int>cparser.UF_SCHEMA].off
  692. ln = parsed.field_data[<int>cparser.UF_SCHEMA].len
  693. schema = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  694. else:
  695. schema = ''
  696. if parsed.field_set & (1 << cparser.UF_HOST):
  697. off = parsed.field_data[<int>cparser.UF_HOST].off
  698. ln = parsed.field_data[<int>cparser.UF_HOST].len
  699. host = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  700. else:
  701. host = ''
  702. if parsed.field_set & (1 << cparser.UF_PORT):
  703. port = parsed.port
  704. if parsed.field_set & (1 << cparser.UF_PATH):
  705. off = parsed.field_data[<int>cparser.UF_PATH].off
  706. ln = parsed.field_data[<int>cparser.UF_PATH].len
  707. path = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  708. else:
  709. path = ''
  710. if parsed.field_set & (1 << cparser.UF_QUERY):
  711. off = parsed.field_data[<int>cparser.UF_QUERY].off
  712. ln = parsed.field_data[<int>cparser.UF_QUERY].len
  713. query = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  714. else:
  715. query = ''
  716. if parsed.field_set & (1 << cparser.UF_FRAGMENT):
  717. off = parsed.field_data[<int>cparser.UF_FRAGMENT].off
  718. ln = parsed.field_data[<int>cparser.UF_FRAGMENT].len
  719. fragment = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  720. else:
  721. fragment = ''
  722. if parsed.field_set & (1 << cparser.UF_USERINFO):
  723. off = parsed.field_data[<int>cparser.UF_USERINFO].off
  724. ln = parsed.field_data[<int>cparser.UF_USERINFO].len
  725. userinfo = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  726. user, sep, password = userinfo.partition(':')
  727. return URL_build(scheme=schema,
  728. user=user, password=password, host=host, port=port,
  729. path=path, query_string=query, fragment=fragment, encoded=True)
  730. else:
  731. raise InvalidURLError("invalid url {!r}".format(buf_data))
  732. finally:
  733. PyMem_Free(parsed)