web_log.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import datetime
  2. import functools
  3. import logging
  4. import os
  5. import re
  6. from collections import namedtuple
  7. from typing import Any, Callable, Dict, Iterable, List, Tuple # noqa
  8. from .abc import AbstractAccessLogger
  9. from .web_request import BaseRequest
  10. from .web_response import StreamResponse
  11. KeyMethod = namedtuple("KeyMethod", "key method")
  12. class AccessLogger(AbstractAccessLogger):
  13. """Helper object to log access.
  14. Usage:
  15. log = logging.getLogger("spam")
  16. log_format = "%a %{User-Agent}i"
  17. access_logger = AccessLogger(log, log_format)
  18. access_logger.log(request, response, time)
  19. Format:
  20. %% The percent sign
  21. %a Remote IP-address (IP-address of proxy if using reverse proxy)
  22. %t Time when the request was started to process
  23. %P The process ID of the child that serviced the request
  24. %r First line of request
  25. %s Response status code
  26. %b Size of response in bytes, including HTTP headers
  27. %T Time taken to serve the request, in seconds
  28. %Tf Time taken to serve the request, in seconds with floating fraction
  29. in .06f format
  30. %D Time taken to serve the request, in microseconds
  31. %{FOO}i request.headers['FOO']
  32. %{FOO}o response.headers['FOO']
  33. %{FOO}e os.environ['FOO']
  34. """
  35. LOG_FORMAT_MAP = {
  36. "a": "remote_address",
  37. "t": "request_start_time",
  38. "P": "process_id",
  39. "r": "first_request_line",
  40. "s": "response_status",
  41. "b": "response_size",
  42. "T": "request_time",
  43. "Tf": "request_time_frac",
  44. "D": "request_time_micro",
  45. "i": "request_header",
  46. "o": "response_header",
  47. }
  48. LOG_FORMAT = '%a %t "%r" %s %b "%{Referer}i" "%{User-Agent}i"'
  49. FORMAT_RE = re.compile(r"%(\{([A-Za-z0-9\-_]+)\}([ioe])|[atPrsbOD]|Tf?)")
  50. CLEANUP_RE = re.compile(r"(%[^s])")
  51. _FORMAT_CACHE = {} # type: Dict[str, Tuple[str, List[KeyMethod]]]
  52. def __init__(self, logger: logging.Logger, log_format: str = LOG_FORMAT) -> None:
  53. """Initialise the logger.
  54. logger is a logger object to be used for logging.
  55. log_format is a string with apache compatible log format description.
  56. """
  57. super().__init__(logger, log_format=log_format)
  58. _compiled_format = AccessLogger._FORMAT_CACHE.get(log_format)
  59. if not _compiled_format:
  60. _compiled_format = self.compile_format(log_format)
  61. AccessLogger._FORMAT_CACHE[log_format] = _compiled_format
  62. self._log_format, self._methods = _compiled_format
  63. def compile_format(self, log_format: str) -> Tuple[str, List[KeyMethod]]:
  64. """Translate log_format into form usable by modulo formatting
  65. All known atoms will be replaced with %s
  66. Also methods for formatting of those atoms will be added to
  67. _methods in appropriate order
  68. For example we have log_format = "%a %t"
  69. This format will be translated to "%s %s"
  70. Also contents of _methods will be
  71. [self._format_a, self._format_t]
  72. These method will be called and results will be passed
  73. to translated string format.
  74. Each _format_* method receive 'args' which is list of arguments
  75. given to self.log
  76. Exceptions are _format_e, _format_i and _format_o methods which
  77. also receive key name (by functools.partial)
  78. """
  79. # list of (key, method) tuples, we don't use an OrderedDict as users
  80. # can repeat the same key more than once
  81. methods = list()
  82. for atom in self.FORMAT_RE.findall(log_format):
  83. if atom[1] == "":
  84. format_key1 = self.LOG_FORMAT_MAP[atom[0]]
  85. m = getattr(AccessLogger, "_format_%s" % atom[0])
  86. key_method = KeyMethod(format_key1, m)
  87. else:
  88. format_key2 = (self.LOG_FORMAT_MAP[atom[2]], atom[1])
  89. m = getattr(AccessLogger, "_format_%s" % atom[2])
  90. key_method = KeyMethod(format_key2, functools.partial(m, atom[1]))
  91. methods.append(key_method)
  92. log_format = self.FORMAT_RE.sub(r"%s", log_format)
  93. log_format = self.CLEANUP_RE.sub(r"%\1", log_format)
  94. return log_format, methods
  95. @staticmethod
  96. def _format_i(
  97. key: str, request: BaseRequest, response: StreamResponse, time: float
  98. ) -> str:
  99. if request is None:
  100. return "(no headers)"
  101. # suboptimal, make istr(key) once
  102. return request.headers.get(key, "-")
  103. @staticmethod
  104. def _format_o(
  105. key: str, request: BaseRequest, response: StreamResponse, time: float
  106. ) -> str:
  107. # suboptimal, make istr(key) once
  108. return response.headers.get(key, "-")
  109. @staticmethod
  110. def _format_a(request: BaseRequest, response: StreamResponse, time: float) -> str:
  111. if request is None:
  112. return "-"
  113. ip = request.remote
  114. return ip if ip is not None else "-"
  115. @staticmethod
  116. def _format_t(request: BaseRequest, response: StreamResponse, time: float) -> str:
  117. now = datetime.datetime.utcnow()
  118. start_time = now - datetime.timedelta(seconds=time)
  119. return start_time.strftime("[%d/%b/%Y:%H:%M:%S +0000]")
  120. @staticmethod
  121. def _format_P(request: BaseRequest, response: StreamResponse, time: float) -> str:
  122. return "<%s>" % os.getpid()
  123. @staticmethod
  124. def _format_r(request: BaseRequest, response: StreamResponse, time: float) -> str:
  125. if request is None:
  126. return "-"
  127. return "{} {} HTTP/{}.{}".format(
  128. request.method,
  129. request.path_qs,
  130. request.version.major,
  131. request.version.minor,
  132. )
  133. @staticmethod
  134. def _format_s(request: BaseRequest, response: StreamResponse, time: float) -> int:
  135. return response.status
  136. @staticmethod
  137. def _format_b(request: BaseRequest, response: StreamResponse, time: float) -> int:
  138. return response.body_length
  139. @staticmethod
  140. def _format_T(request: BaseRequest, response: StreamResponse, time: float) -> str:
  141. return str(round(time))
  142. @staticmethod
  143. def _format_Tf(request: BaseRequest, response: StreamResponse, time: float) -> str:
  144. return "%06f" % time
  145. @staticmethod
  146. def _format_D(request: BaseRequest, response: StreamResponse, time: float) -> str:
  147. return str(round(time * 1000000))
  148. def _format_line(
  149. self, request: BaseRequest, response: StreamResponse, time: float
  150. ) -> Iterable[Tuple[str, Callable[[BaseRequest, StreamResponse, float], str]]]:
  151. return [(key, method(request, response, time)) for key, method in self._methods]
  152. def log(self, request: BaseRequest, response: StreamResponse, time: float) -> None:
  153. try:
  154. fmt_info = self._format_line(request, response, time)
  155. values = list()
  156. extra = dict()
  157. for key, value in fmt_info:
  158. values.append(value)
  159. if key.__class__ is str:
  160. extra[key] = value
  161. else:
  162. k1, k2 = key # type: ignore
  163. dct = extra.get(k1, {}) # type: ignore
  164. dct[k2] = value # type: ignore
  165. extra[k1] = dct # type: ignore
  166. self.logger.info(self._log_format % tuple(values), extra=extra)
  167. except Exception:
  168. self.logger.exception("Error in logging")