profile.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. #! /usr/bin/env python3
  2. #
  3. # Class for profiling python code. rev 1.0 6/2/94
  4. #
  5. # Written by James Roskind
  6. # Based on prior profile module by Sjoerd Mullender...
  7. # which was hacked somewhat by: Guido van Rossum
  8. """Class for profiling Python code."""
  9. # Copyright Disney Enterprises, Inc. All Rights Reserved.
  10. # Licensed to PSF under a Contributor Agreement
  11. #
  12. # Licensed under the Apache License, Version 2.0 (the "License");
  13. # you may not use this file except in compliance with the License.
  14. # You may obtain a copy of the License at
  15. #
  16. # http://www.apache.org/licenses/LICENSE-2.0
  17. #
  18. # Unless required by applicable law or agreed to in writing, software
  19. # distributed under the License is distributed on an "AS IS" BASIS,
  20. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
  21. # either express or implied. See the License for the specific language
  22. # governing permissions and limitations under the License.
  23. import importlib.machinery
  24. import io
  25. import sys
  26. import time
  27. import marshal
  28. __all__ = ["run", "runctx", "Profile"]
  29. # Sample timer for use with
  30. #i_count = 0
  31. #def integer_timer():
  32. # global i_count
  33. # i_count = i_count + 1
  34. # return i_count
  35. #itimes = integer_timer # replace with C coded timer returning integers
  36. class _Utils:
  37. """Support class for utility functions which are shared by
  38. profile.py and cProfile.py modules.
  39. Not supposed to be used directly.
  40. """
  41. def __init__(self, profiler):
  42. self.profiler = profiler
  43. def run(self, statement, filename, sort):
  44. prof = self.profiler()
  45. try:
  46. prof.run(statement)
  47. except SystemExit:
  48. pass
  49. finally:
  50. self._show(prof, filename, sort)
  51. def runctx(self, statement, globals, locals, filename, sort):
  52. prof = self.profiler()
  53. try:
  54. prof.runctx(statement, globals, locals)
  55. except SystemExit:
  56. pass
  57. finally:
  58. self._show(prof, filename, sort)
  59. def _show(self, prof, filename, sort):
  60. if filename is not None:
  61. prof.dump_stats(filename)
  62. else:
  63. prof.print_stats(sort)
  64. #**************************************************************************
  65. # The following are the static member functions for the profiler class
  66. # Note that an instance of Profile() is *not* needed to call them.
  67. #**************************************************************************
  68. def run(statement, filename=None, sort=-1):
  69. """Run statement under profiler optionally saving results in filename
  70. This function takes a single argument that can be passed to the
  71. "exec" statement, and an optional file name. In all cases this
  72. routine attempts to "exec" its first argument and gather profiling
  73. statistics from the execution. If no file name is present, then this
  74. function automatically prints a simple profiling report, sorted by the
  75. standard name string (file/line/function-name) that is presented in
  76. each line.
  77. """
  78. return _Utils(Profile).run(statement, filename, sort)
  79. def runctx(statement, globals, locals, filename=None, sort=-1):
  80. """Run statement under profiler, supplying your own globals and locals,
  81. optionally saving results in filename.
  82. statement and filename have the same semantics as profile.run
  83. """
  84. return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
  85. class Profile:
  86. """Profiler class.
  87. self.cur is always a tuple. Each such tuple corresponds to a stack
  88. frame that is currently active (self.cur[-2]). The following are the
  89. definitions of its members. We use this external "parallel stack" to
  90. avoid contaminating the program that we are profiling. (old profiler
  91. used to write into the frames local dictionary!!) Derived classes
  92. can change the definition of some entries, as long as they leave
  93. [-2:] intact (frame and previous tuple). In case an internal error is
  94. detected, the -3 element is used as the function name.
  95. [ 0] = Time that needs to be charged to the parent frame's function.
  96. It is used so that a function call will not have to access the
  97. timing data for the parent frame.
  98. [ 1] = Total time spent in this frame's function, excluding time in
  99. subfunctions (this latter is tallied in cur[2]).
  100. [ 2] = Total time spent in subfunctions, excluding time executing the
  101. frame's function (this latter is tallied in cur[1]).
  102. [-3] = Name of the function that corresponds to this frame.
  103. [-2] = Actual frame that we correspond to (used to sync exception handling).
  104. [-1] = Our parent 6-tuple (corresponds to frame.f_back).
  105. Timing data for each function is stored as a 5-tuple in the dictionary
  106. self.timings[]. The index is always the name stored in self.cur[-3].
  107. The following are the definitions of the members:
  108. [0] = The number of times this function was called, not counting direct
  109. or indirect recursion,
  110. [1] = Number of times this function appears on the stack, minus one
  111. [2] = Total time spent internal to this function
  112. [3] = Cumulative time that this function was present on the stack. In
  113. non-recursive functions, this is the total execution time from start
  114. to finish of each invocation of a function, including time spent in
  115. all subfunctions.
  116. [4] = A dictionary indicating for each function name, the number of times
  117. it was called by us.
  118. """
  119. bias = 0 # calibration constant
  120. def __init__(self, timer=None, bias=None):
  121. self.timings = {}
  122. self.cur = None
  123. self.cmd = ""
  124. self.c_func_name = ""
  125. if bias is None:
  126. bias = self.bias
  127. self.bias = bias # Materialize in local dict for lookup speed.
  128. if not timer:
  129. self.timer = self.get_time = time.process_time
  130. self.dispatcher = self.trace_dispatch_i
  131. else:
  132. self.timer = timer
  133. t = self.timer() # test out timer function
  134. try:
  135. length = len(t)
  136. except TypeError:
  137. self.get_time = timer
  138. self.dispatcher = self.trace_dispatch_i
  139. else:
  140. if length == 2:
  141. self.dispatcher = self.trace_dispatch
  142. else:
  143. self.dispatcher = self.trace_dispatch_l
  144. # This get_time() implementation needs to be defined
  145. # here to capture the passed-in timer in the parameter
  146. # list (for performance). Note that we can't assume
  147. # the timer() result contains two values in all
  148. # cases.
  149. def get_time_timer(timer=timer, sum=sum):
  150. return sum(timer())
  151. self.get_time = get_time_timer
  152. self.t = self.get_time()
  153. self.simulate_call('profiler')
  154. # Heavily optimized dispatch routine for time.process_time() timer
  155. def trace_dispatch(self, frame, event, arg):
  156. timer = self.timer
  157. t = timer()
  158. t = t[0] + t[1] - self.t - self.bias
  159. if event == "c_call":
  160. self.c_func_name = arg.__name__
  161. if self.dispatch[event](self, frame,t):
  162. t = timer()
  163. self.t = t[0] + t[1]
  164. else:
  165. r = timer()
  166. self.t = r[0] + r[1] - t # put back unrecorded delta
  167. # Dispatch routine for best timer program (return = scalar, fastest if
  168. # an integer but float works too -- and time.process_time() relies on that).
  169. def trace_dispatch_i(self, frame, event, arg):
  170. timer = self.timer
  171. t = timer() - self.t - self.bias
  172. if event == "c_call":
  173. self.c_func_name = arg.__name__
  174. if self.dispatch[event](self, frame, t):
  175. self.t = timer()
  176. else:
  177. self.t = timer() - t # put back unrecorded delta
  178. # Dispatch routine for macintosh (timer returns time in ticks of
  179. # 1/60th second)
  180. def trace_dispatch_mac(self, frame, event, arg):
  181. timer = self.timer
  182. t = timer()/60.0 - self.t - self.bias
  183. if event == "c_call":
  184. self.c_func_name = arg.__name__
  185. if self.dispatch[event](self, frame, t):
  186. self.t = timer()/60.0
  187. else:
  188. self.t = timer()/60.0 - t # put back unrecorded delta
  189. # SLOW generic dispatch routine for timer returning lists of numbers
  190. def trace_dispatch_l(self, frame, event, arg):
  191. get_time = self.get_time
  192. t = get_time() - self.t - self.bias
  193. if event == "c_call":
  194. self.c_func_name = arg.__name__
  195. if self.dispatch[event](self, frame, t):
  196. self.t = get_time()
  197. else:
  198. self.t = get_time() - t # put back unrecorded delta
  199. # In the event handlers, the first 3 elements of self.cur are unpacked
  200. # into vrbls w/ 3-letter names. The last two characters are meant to be
  201. # mnemonic:
  202. # _pt self.cur[0] "parent time" time to be charged to parent frame
  203. # _it self.cur[1] "internal time" time spent directly in the function
  204. # _et self.cur[2] "external time" time spent in subfunctions
  205. def trace_dispatch_exception(self, frame, t):
  206. rpt, rit, ret, rfn, rframe, rcur = self.cur
  207. if (rframe is not frame) and rcur:
  208. return self.trace_dispatch_return(rframe, t)
  209. self.cur = rpt, rit+t, ret, rfn, rframe, rcur
  210. return 1
  211. def trace_dispatch_call(self, frame, t):
  212. if self.cur and frame.f_back is not self.cur[-2]:
  213. rpt, rit, ret, rfn, rframe, rcur = self.cur
  214. if not isinstance(rframe, Profile.fake_frame):
  215. assert rframe.f_back is frame.f_back, ("Bad call", rfn,
  216. rframe, rframe.f_back,
  217. frame, frame.f_back)
  218. self.trace_dispatch_return(rframe, 0)
  219. assert (self.cur is None or \
  220. frame.f_back is self.cur[-2]), ("Bad call",
  221. self.cur[-3])
  222. fcode = frame.f_code
  223. fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
  224. self.cur = (t, 0, 0, fn, frame, self.cur)
  225. timings = self.timings
  226. if fn in timings:
  227. cc, ns, tt, ct, callers = timings[fn]
  228. timings[fn] = cc, ns + 1, tt, ct, callers
  229. else:
  230. timings[fn] = 0, 0, 0, 0, {}
  231. return 1
  232. def trace_dispatch_c_call (self, frame, t):
  233. fn = ("", 0, self.c_func_name)
  234. self.cur = (t, 0, 0, fn, frame, self.cur)
  235. timings = self.timings
  236. if fn in timings:
  237. cc, ns, tt, ct, callers = timings[fn]
  238. timings[fn] = cc, ns+1, tt, ct, callers
  239. else:
  240. timings[fn] = 0, 0, 0, 0, {}
  241. return 1
  242. def trace_dispatch_return(self, frame, t):
  243. if frame is not self.cur[-2]:
  244. assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
  245. self.trace_dispatch_return(self.cur[-2], 0)
  246. # Prefix "r" means part of the Returning or exiting frame.
  247. # Prefix "p" means part of the Previous or Parent or older frame.
  248. rpt, rit, ret, rfn, frame, rcur = self.cur
  249. rit = rit + t
  250. frame_total = rit + ret
  251. ppt, pit, pet, pfn, pframe, pcur = rcur
  252. self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
  253. timings = self.timings
  254. cc, ns, tt, ct, callers = timings[rfn]
  255. if not ns:
  256. # This is the only occurrence of the function on the stack.
  257. # Else this is a (directly or indirectly) recursive call, and
  258. # its cumulative time will get updated when the topmost call to
  259. # it returns.
  260. ct = ct + frame_total
  261. cc = cc + 1
  262. if pfn in callers:
  263. callers[pfn] = callers[pfn] + 1 # hack: gather more
  264. # stats such as the amount of time added to ct courtesy
  265. # of this specific call, and the contribution to cc
  266. # courtesy of this call.
  267. else:
  268. callers[pfn] = 1
  269. timings[rfn] = cc, ns - 1, tt + rit, ct, callers
  270. return 1
  271. dispatch = {
  272. "call": trace_dispatch_call,
  273. "exception": trace_dispatch_exception,
  274. "return": trace_dispatch_return,
  275. "c_call": trace_dispatch_c_call,
  276. "c_exception": trace_dispatch_return, # the C function returned
  277. "c_return": trace_dispatch_return,
  278. }
  279. # The next few functions play with self.cmd. By carefully preloading
  280. # our parallel stack, we can force the profiled result to include
  281. # an arbitrary string as the name of the calling function.
  282. # We use self.cmd as that string, and the resulting stats look
  283. # very nice :-).
  284. def set_cmd(self, cmd):
  285. if self.cur[-1]: return # already set
  286. self.cmd = cmd
  287. self.simulate_call(cmd)
  288. class fake_code:
  289. def __init__(self, filename, line, name):
  290. self.co_filename = filename
  291. self.co_line = line
  292. self.co_name = name
  293. self.co_firstlineno = 0
  294. def __repr__(self):
  295. return repr((self.co_filename, self.co_line, self.co_name))
  296. class fake_frame:
  297. def __init__(self, code, prior):
  298. self.f_code = code
  299. self.f_back = prior
  300. def simulate_call(self, name):
  301. code = self.fake_code('profile', 0, name)
  302. if self.cur:
  303. pframe = self.cur[-2]
  304. else:
  305. pframe = None
  306. frame = self.fake_frame(code, pframe)
  307. self.dispatch['call'](self, frame, 0)
  308. # collect stats from pending stack, including getting final
  309. # timings for self.cmd frame.
  310. def simulate_cmd_complete(self):
  311. get_time = self.get_time
  312. t = get_time() - self.t
  313. while self.cur[-1]:
  314. # We *can* cause assertion errors here if
  315. # dispatch_trace_return checks for a frame match!
  316. self.dispatch['return'](self, self.cur[-2], t)
  317. t = 0
  318. self.t = get_time() - t
  319. def print_stats(self, sort=-1):
  320. import pstats
  321. pstats.Stats(self).strip_dirs().sort_stats(sort). \
  322. print_stats()
  323. def dump_stats(self, file):
  324. with open(file, 'wb') as f:
  325. self.create_stats()
  326. marshal.dump(self.stats, f)
  327. def create_stats(self):
  328. self.simulate_cmd_complete()
  329. self.snapshot_stats()
  330. def snapshot_stats(self):
  331. self.stats = {}
  332. for func, (cc, ns, tt, ct, callers) in self.timings.items():
  333. callers = callers.copy()
  334. nc = 0
  335. for callcnt in callers.values():
  336. nc += callcnt
  337. self.stats[func] = cc, nc, tt, ct, callers
  338. # The following two methods can be called by clients to use
  339. # a profiler to profile a statement, given as a string.
  340. def run(self, cmd):
  341. import __main__
  342. dict = __main__.__dict__
  343. return self.runctx(cmd, dict, dict)
  344. def runctx(self, cmd, globals, locals):
  345. self.set_cmd(cmd)
  346. sys.setprofile(self.dispatcher)
  347. try:
  348. exec(cmd, globals, locals)
  349. finally:
  350. sys.setprofile(None)
  351. return self
  352. # This method is more useful to profile a single function call.
  353. def runcall(self, func, /, *args, **kw):
  354. self.set_cmd(repr(func))
  355. sys.setprofile(self.dispatcher)
  356. try:
  357. return func(*args, **kw)
  358. finally:
  359. sys.setprofile(None)
  360. #******************************************************************
  361. # The following calculates the overhead for using a profiler. The
  362. # problem is that it takes a fair amount of time for the profiler
  363. # to stop the stopwatch (from the time it receives an event).
  364. # Similarly, there is a delay from the time that the profiler
  365. # re-starts the stopwatch before the user's code really gets to
  366. # continue. The following code tries to measure the difference on
  367. # a per-event basis.
  368. #
  369. # Note that this difference is only significant if there are a lot of
  370. # events, and relatively little user code per event. For example,
  371. # code with small functions will typically benefit from having the
  372. # profiler calibrated for the current platform. This *could* be
  373. # done on the fly during init() time, but it is not worth the
  374. # effort. Also note that if too large a value specified, then
  375. # execution time on some functions will actually appear as a
  376. # negative number. It is *normal* for some functions (with very
  377. # low call counts) to have such negative stats, even if the
  378. # calibration figure is "correct."
  379. #
  380. # One alternative to profile-time calibration adjustments (i.e.,
  381. # adding in the magic little delta during each event) is to track
  382. # more carefully the number of events (and cumulatively, the number
  383. # of events during sub functions) that are seen. If this were
  384. # done, then the arithmetic could be done after the fact (i.e., at
  385. # display time). Currently, we track only call/return events.
  386. # These values can be deduced by examining the callees and callers
  387. # vectors for each functions. Hence we *can* almost correct the
  388. # internal time figure at print time (note that we currently don't
  389. # track exception event processing counts). Unfortunately, there
  390. # is currently no similar information for cumulative sub-function
  391. # time. It would not be hard to "get all this info" at profiler
  392. # time. Specifically, we would have to extend the tuples to keep
  393. # counts of this in each frame, and then extend the defs of timing
  394. # tuples to include the significant two figures. I'm a bit fearful
  395. # that this additional feature will slow the heavily optimized
  396. # event/time ratio (i.e., the profiler would run slower, fur a very
  397. # low "value added" feature.)
  398. #**************************************************************
  399. def calibrate(self, m, verbose=0):
  400. if self.__class__ is not Profile:
  401. raise TypeError("Subclasses must override .calibrate().")
  402. saved_bias = self.bias
  403. self.bias = 0
  404. try:
  405. return self._calibrate_inner(m, verbose)
  406. finally:
  407. self.bias = saved_bias
  408. def _calibrate_inner(self, m, verbose):
  409. get_time = self.get_time
  410. # Set up a test case to be run with and without profiling. Include
  411. # lots of calls, because we're trying to quantify stopwatch overhead.
  412. # Do not raise any exceptions, though, because we want to know
  413. # exactly how many profile events are generated (one call event, +
  414. # one return event, per Python-level call).
  415. def f1(n):
  416. for i in range(n):
  417. x = 1
  418. def f(m, f1=f1):
  419. for i in range(m):
  420. f1(100)
  421. f(m) # warm up the cache
  422. # elapsed_noprofile <- time f(m) takes without profiling.
  423. t0 = get_time()
  424. f(m)
  425. t1 = get_time()
  426. elapsed_noprofile = t1 - t0
  427. if verbose:
  428. print("elapsed time without profiling =", elapsed_noprofile)
  429. # elapsed_profile <- time f(m) takes with profiling. The difference
  430. # is profiling overhead, only some of which the profiler subtracts
  431. # out on its own.
  432. p = Profile()
  433. t0 = get_time()
  434. p.runctx('f(m)', globals(), locals())
  435. t1 = get_time()
  436. elapsed_profile = t1 - t0
  437. if verbose:
  438. print("elapsed time with profiling =", elapsed_profile)
  439. # reported_time <- "CPU seconds" the profiler charged to f and f1.
  440. total_calls = 0.0
  441. reported_time = 0.0
  442. for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
  443. p.timings.items():
  444. if funcname in ("f", "f1"):
  445. total_calls += cc
  446. reported_time += tt
  447. if verbose:
  448. print("'CPU seconds' profiler reported =", reported_time)
  449. print("total # calls =", total_calls)
  450. if total_calls != m + 1:
  451. raise ValueError("internal error: total calls = %d" % total_calls)
  452. # reported_time - elapsed_noprofile = overhead the profiler wasn't
  453. # able to measure. Divide by twice the number of calls (since there
  454. # are two profiler events per call in this test) to get the hidden
  455. # overhead per event.
  456. mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
  457. if verbose:
  458. print("mean stopwatch overhead per profile event =", mean)
  459. return mean
  460. #****************************************************************************
  461. def main():
  462. import os
  463. from optparse import OptionParser
  464. usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
  465. parser = OptionParser(usage=usage)
  466. parser.allow_interspersed_args = False
  467. parser.add_option('-o', '--outfile', dest="outfile",
  468. help="Save stats to <outfile>", default=None)
  469. parser.add_option('-m', dest="module", action="store_true",
  470. help="Profile a library module.", default=False)
  471. parser.add_option('-s', '--sort', dest="sort",
  472. help="Sort order when printing to stdout, based on pstats.Stats class",
  473. default=-1)
  474. if not sys.argv[1:]:
  475. parser.print_usage()
  476. sys.exit(2)
  477. (options, args) = parser.parse_args()
  478. sys.argv[:] = args
  479. # The script that we're profiling may chdir, so capture the absolute path
  480. # to the output file at startup.
  481. if options.outfile is not None:
  482. options.outfile = os.path.abspath(options.outfile)
  483. if len(args) > 0:
  484. if options.module:
  485. import runpy
  486. code = "run_module(modname, run_name='__main__')"
  487. globs = {
  488. 'run_module': runpy.run_module,
  489. 'modname': args[0]
  490. }
  491. else:
  492. progname = args[0]
  493. sys.path.insert(0, os.path.dirname(progname))
  494. with io.open_code(progname) as fp:
  495. code = compile(fp.read(), progname, 'exec')
  496. spec = importlib.machinery.ModuleSpec(name='__main__', loader=None,
  497. origin=progname)
  498. globs = {
  499. '__spec__': spec,
  500. '__file__': spec.origin,
  501. '__name__': spec.name,
  502. '__package__': None,
  503. '__cached__': None,
  504. }
  505. try:
  506. runctx(code, globs, None, options.outfile, options.sort)
  507. except BrokenPipeError as exc:
  508. # Prevent "Exception ignored" during interpreter shutdown.
  509. sys.stdout = None
  510. sys.exit(exc.errno)
  511. else:
  512. parser.print_usage()
  513. return parser
  514. # When invoked as main program, invoke the profiler on a script
  515. if __name__ == '__main__':
  516. main()