cmdline.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. """
  2. pygments.cmdline
  3. ~~~~~~~~~~~~~~~~
  4. Command line interface.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import os
  9. import sys
  10. import shutil
  11. import argparse
  12. from textwrap import dedent
  13. from pygments import __version__, highlight
  14. from pygments.util import ClassNotFound, OptionError, docstring_headline, \
  15. guess_decode, guess_decode_from_terminal, terminal_encoding, \
  16. UnclosingTextIOWrapper
  17. from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
  18. load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename
  19. from pygments.lexers.special import TextLexer
  20. from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
  21. from pygments.formatters import get_all_formatters, get_formatter_by_name, \
  22. load_formatter_from_file, get_formatter_for_filename, find_formatter_class
  23. from pygments.formatters.terminal import TerminalFormatter
  24. from pygments.formatters.terminal256 import Terminal256Formatter
  25. from pygments.filters import get_all_filters, find_filter_class
  26. from pygments.styles import get_all_styles, get_style_by_name
  27. def _parse_options(o_strs):
  28. opts = {}
  29. if not o_strs:
  30. return opts
  31. for o_str in o_strs:
  32. if not o_str.strip():
  33. continue
  34. o_args = o_str.split(',')
  35. for o_arg in o_args:
  36. o_arg = o_arg.strip()
  37. try:
  38. o_key, o_val = o_arg.split('=', 1)
  39. o_key = o_key.strip()
  40. o_val = o_val.strip()
  41. except ValueError:
  42. opts[o_arg] = True
  43. else:
  44. opts[o_key] = o_val
  45. return opts
  46. def _parse_filters(f_strs):
  47. filters = []
  48. if not f_strs:
  49. return filters
  50. for f_str in f_strs:
  51. if ':' in f_str:
  52. fname, fopts = f_str.split(':', 1)
  53. filters.append((fname, _parse_options([fopts])))
  54. else:
  55. filters.append((f_str, {}))
  56. return filters
  57. def _print_help(what, name):
  58. try:
  59. if what == 'lexer':
  60. cls = get_lexer_by_name(name)
  61. print("Help on the %s lexer:" % cls.name)
  62. print(dedent(cls.__doc__))
  63. elif what == 'formatter':
  64. cls = find_formatter_class(name)
  65. print("Help on the %s formatter:" % cls.name)
  66. print(dedent(cls.__doc__))
  67. elif what == 'filter':
  68. cls = find_filter_class(name)
  69. print("Help on the %s filter:" % name)
  70. print(dedent(cls.__doc__))
  71. return 0
  72. except (AttributeError, ValueError):
  73. print("%s not found!" % what, file=sys.stderr)
  74. return 1
  75. def _print_list(what):
  76. if what == 'lexer':
  77. print()
  78. print("Lexers:")
  79. print("~~~~~~~")
  80. info = []
  81. for fullname, names, exts, _ in get_all_lexers():
  82. tup = (', '.join(names)+':', fullname,
  83. exts and '(filenames ' + ', '.join(exts) + ')' or '')
  84. info.append(tup)
  85. info.sort()
  86. for i in info:
  87. print(('* %s\n %s %s') % i)
  88. elif what == 'formatter':
  89. print()
  90. print("Formatters:")
  91. print("~~~~~~~~~~~")
  92. info = []
  93. for cls in get_all_formatters():
  94. doc = docstring_headline(cls)
  95. tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and
  96. '(filenames ' + ', '.join(cls.filenames) + ')' or '')
  97. info.append(tup)
  98. info.sort()
  99. for i in info:
  100. print(('* %s\n %s %s') % i)
  101. elif what == 'filter':
  102. print()
  103. print("Filters:")
  104. print("~~~~~~~~")
  105. for name in get_all_filters():
  106. cls = find_filter_class(name)
  107. print("* " + name + ':')
  108. print(" %s" % docstring_headline(cls))
  109. elif what == 'style':
  110. print()
  111. print("Styles:")
  112. print("~~~~~~~")
  113. for name in get_all_styles():
  114. cls = get_style_by_name(name)
  115. print("* " + name + ':')
  116. print(" %s" % docstring_headline(cls))
  117. def main_inner(parser, argns):
  118. if argns.help:
  119. parser.print_help()
  120. return 0
  121. if argns.V:
  122. print('Pygments version %s, (c) 2006-2021 by Georg Brandl, Matthäus '
  123. 'Chajdas and contributors.' % __version__)
  124. return 0
  125. def is_only_option(opt):
  126. return not any(v for (k, v) in vars(argns).items() if k != opt)
  127. # handle ``pygmentize -L``
  128. if argns.L is not None:
  129. if not is_only_option('L'):
  130. parser.print_help(sys.stderr)
  131. return 2
  132. # print version
  133. main(['', '-V'])
  134. allowed_types = {'lexer', 'formatter', 'filter', 'style'}
  135. largs = [arg.rstrip('s') for arg in argns.L]
  136. if any(arg not in allowed_types for arg in largs):
  137. parser.print_help(sys.stderr)
  138. return 0
  139. if not largs:
  140. largs = allowed_types
  141. for arg in largs:
  142. _print_list(arg)
  143. return 0
  144. # handle ``pygmentize -H``
  145. if argns.H:
  146. if not is_only_option('H'):
  147. parser.print_help(sys.stderr)
  148. return 2
  149. what, name = argns.H
  150. if what not in ('lexer', 'formatter', 'filter'):
  151. parser.print_help(sys.stderr)
  152. return 2
  153. return _print_help(what, name)
  154. # parse -O options
  155. parsed_opts = _parse_options(argns.O or [])
  156. # parse -P options
  157. for p_opt in argns.P or []:
  158. try:
  159. name, value = p_opt.split('=', 1)
  160. except ValueError:
  161. parsed_opts[p_opt] = True
  162. else:
  163. parsed_opts[name] = value
  164. # encodings
  165. inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
  166. outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
  167. # handle ``pygmentize -N``
  168. if argns.N:
  169. lexer = find_lexer_class_for_filename(argns.N)
  170. if lexer is None:
  171. lexer = TextLexer
  172. print(lexer.aliases[0])
  173. return 0
  174. # handle ``pygmentize -C``
  175. if argns.C:
  176. inp = sys.stdin.buffer.read()
  177. try:
  178. lexer = guess_lexer(inp, inencoding=inencoding)
  179. except ClassNotFound:
  180. lexer = TextLexer
  181. print(lexer.aliases[0])
  182. return 0
  183. # handle ``pygmentize -S``
  184. S_opt = argns.S
  185. a_opt = argns.a
  186. if S_opt is not None:
  187. f_opt = argns.f
  188. if not f_opt:
  189. parser.print_help(sys.stderr)
  190. return 2
  191. if argns.l or argns.INPUTFILE:
  192. parser.print_help(sys.stderr)
  193. return 2
  194. try:
  195. parsed_opts['style'] = S_opt
  196. fmter = get_formatter_by_name(f_opt, **parsed_opts)
  197. except ClassNotFound as err:
  198. print(err, file=sys.stderr)
  199. return 1
  200. print(fmter.get_style_defs(a_opt or ''))
  201. return 0
  202. # if no -S is given, -a is not allowed
  203. if argns.a is not None:
  204. parser.print_help(sys.stderr)
  205. return 2
  206. # parse -F options
  207. F_opts = _parse_filters(argns.F or [])
  208. # -x: allow custom (eXternal) lexers and formatters
  209. allow_custom_lexer_formatter = bool(argns.x)
  210. # select lexer
  211. lexer = None
  212. # given by name?
  213. lexername = argns.l
  214. if lexername:
  215. # custom lexer, located relative to user's cwd
  216. if allow_custom_lexer_formatter and '.py' in lexername:
  217. try:
  218. filename = None
  219. name = None
  220. if ':' in lexername:
  221. filename, name = lexername.rsplit(':', 1)
  222. if '.py' in name:
  223. # This can happen on Windows: If the lexername is
  224. # C:\lexer.py -- return to normal load path in that case
  225. name = None
  226. if filename and name:
  227. lexer = load_lexer_from_file(filename, name,
  228. **parsed_opts)
  229. else:
  230. lexer = load_lexer_from_file(lexername, **parsed_opts)
  231. except ClassNotFound as err:
  232. print('Error:', err, file=sys.stderr)
  233. return 1
  234. else:
  235. try:
  236. lexer = get_lexer_by_name(lexername, **parsed_opts)
  237. except (OptionError, ClassNotFound) as err:
  238. print('Error:', err, file=sys.stderr)
  239. return 1
  240. # read input code
  241. code = None
  242. if argns.INPUTFILE:
  243. if argns.s:
  244. print('Error: -s option not usable when input file specified',
  245. file=sys.stderr)
  246. return 2
  247. infn = argns.INPUTFILE
  248. try:
  249. with open(infn, 'rb') as infp:
  250. code = infp.read()
  251. except Exception as err:
  252. print('Error: cannot read infile:', err, file=sys.stderr)
  253. return 1
  254. if not inencoding:
  255. code, inencoding = guess_decode(code)
  256. # do we have to guess the lexer?
  257. if not lexer:
  258. try:
  259. lexer = get_lexer_for_filename(infn, code, **parsed_opts)
  260. except ClassNotFound as err:
  261. if argns.g:
  262. try:
  263. lexer = guess_lexer(code, **parsed_opts)
  264. except ClassNotFound:
  265. lexer = TextLexer(**parsed_opts)
  266. else:
  267. print('Error:', err, file=sys.stderr)
  268. return 1
  269. except OptionError as err:
  270. print('Error:', err, file=sys.stderr)
  271. return 1
  272. elif not argns.s: # treat stdin as full file (-s support is later)
  273. # read code from terminal, always in binary mode since we want to
  274. # decode ourselves and be tolerant with it
  275. code = sys.stdin.buffer.read() # use .buffer to get a binary stream
  276. if not inencoding:
  277. code, inencoding = guess_decode_from_terminal(code, sys.stdin)
  278. # else the lexer will do the decoding
  279. if not lexer:
  280. try:
  281. lexer = guess_lexer(code, **parsed_opts)
  282. except ClassNotFound:
  283. lexer = TextLexer(**parsed_opts)
  284. else: # -s option needs a lexer with -l
  285. if not lexer:
  286. print('Error: when using -s a lexer has to be selected with -l',
  287. file=sys.stderr)
  288. return 2
  289. # process filters
  290. for fname, fopts in F_opts:
  291. try:
  292. lexer.add_filter(fname, **fopts)
  293. except ClassNotFound as err:
  294. print('Error:', err, file=sys.stderr)
  295. return 1
  296. # select formatter
  297. outfn = argns.o
  298. fmter = argns.f
  299. if fmter:
  300. # custom formatter, located relative to user's cwd
  301. if allow_custom_lexer_formatter and '.py' in fmter:
  302. try:
  303. filename = None
  304. name = None
  305. if ':' in fmter:
  306. # Same logic as above for custom lexer
  307. filename, name = fmter.rsplit(':', 1)
  308. if '.py' in name:
  309. name = None
  310. if filename and name:
  311. fmter = load_formatter_from_file(filename, name,
  312. **parsed_opts)
  313. else:
  314. fmter = load_formatter_from_file(fmter, **parsed_opts)
  315. except ClassNotFound as err:
  316. print('Error:', err, file=sys.stderr)
  317. return 1
  318. else:
  319. try:
  320. fmter = get_formatter_by_name(fmter, **parsed_opts)
  321. except (OptionError, ClassNotFound) as err:
  322. print('Error:', err, file=sys.stderr)
  323. return 1
  324. if outfn:
  325. if not fmter:
  326. try:
  327. fmter = get_formatter_for_filename(outfn, **parsed_opts)
  328. except (OptionError, ClassNotFound) as err:
  329. print('Error:', err, file=sys.stderr)
  330. return 1
  331. try:
  332. outfile = open(outfn, 'wb')
  333. except Exception as err:
  334. print('Error: cannot open outfile:', err, file=sys.stderr)
  335. return 1
  336. else:
  337. if not fmter:
  338. if '256' in os.environ.get('TERM', ''):
  339. fmter = Terminal256Formatter(**parsed_opts)
  340. else:
  341. fmter = TerminalFormatter(**parsed_opts)
  342. outfile = sys.stdout.buffer
  343. # determine output encoding if not explicitly selected
  344. if not outencoding:
  345. if outfn:
  346. # output file? use lexer encoding for now (can still be None)
  347. fmter.encoding = inencoding
  348. else:
  349. # else use terminal encoding
  350. fmter.encoding = terminal_encoding(sys.stdout)
  351. # provide coloring under Windows, if possible
  352. if not outfn and sys.platform in ('win32', 'cygwin') and \
  353. fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover
  354. # unfortunately colorama doesn't support binary streams on Py3
  355. outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
  356. fmter.encoding = None
  357. try:
  358. import colorama.initialise
  359. except ImportError:
  360. pass
  361. else:
  362. outfile = colorama.initialise.wrap_stream(
  363. outfile, convert=None, strip=None, autoreset=False, wrap=True)
  364. # When using the LaTeX formatter and the option `escapeinside` is
  365. # specified, we need a special lexer which collects escaped text
  366. # before running the chosen language lexer.
  367. escapeinside = parsed_opts.get('escapeinside', '')
  368. if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
  369. left = escapeinside[0]
  370. right = escapeinside[1]
  371. lexer = LatexEmbeddedLexer(left, right, lexer)
  372. # ... and do it!
  373. if not argns.s:
  374. # process whole input as per normal...
  375. try:
  376. highlight(code, lexer, fmter, outfile)
  377. finally:
  378. if outfn:
  379. outfile.close()
  380. return 0
  381. else:
  382. # line by line processing of stdin (eg: for 'tail -f')...
  383. try:
  384. while 1:
  385. line = sys.stdin.buffer.readline()
  386. if not line:
  387. break
  388. if not inencoding:
  389. line = guess_decode_from_terminal(line, sys.stdin)[0]
  390. highlight(line, lexer, fmter, outfile)
  391. if hasattr(outfile, 'flush'):
  392. outfile.flush()
  393. return 0
  394. except KeyboardInterrupt: # pragma: no cover
  395. return 0
  396. finally:
  397. if outfn:
  398. outfile.close()
  399. class HelpFormatter(argparse.HelpFormatter):
  400. def __init__(self, prog, indent_increment=2, max_help_position=16, width=None):
  401. if width is None:
  402. try:
  403. width = shutil.get_terminal_size().columns - 2
  404. except Exception:
  405. pass
  406. argparse.HelpFormatter.__init__(self, prog, indent_increment,
  407. max_help_position, width)
  408. def main(args=sys.argv):
  409. """
  410. Main command line entry point.
  411. """
  412. desc = "Highlight an input file and write the result to an output file."
  413. parser = argparse.ArgumentParser(description=desc, add_help=False,
  414. formatter_class=HelpFormatter)
  415. operation = parser.add_argument_group('Main operation')
  416. lexersel = operation.add_mutually_exclusive_group()
  417. lexersel.add_argument(
  418. '-l', metavar='LEXER',
  419. help='Specify the lexer to use. (Query names with -L.) If not '
  420. 'given and -g is not present, the lexer is guessed from the filename.')
  421. lexersel.add_argument(
  422. '-g', action='store_true',
  423. help='Guess the lexer from the file contents, or pass through '
  424. 'as plain text if nothing can be guessed.')
  425. operation.add_argument(
  426. '-F', metavar='FILTER[:options]', action='append',
  427. help='Add a filter to the token stream. (Query names with -L.) '
  428. 'Filter options are given after a colon if necessary.')
  429. operation.add_argument(
  430. '-f', metavar='FORMATTER',
  431. help='Specify the formatter to use. (Query names with -L.) '
  432. 'If not given, the formatter is guessed from the output filename, '
  433. 'and defaults to the terminal formatter if the output is to the '
  434. 'terminal or an unknown file extension.')
  435. operation.add_argument(
  436. '-O', metavar='OPTION=value[,OPTION=value,...]', action='append',
  437. help='Give options to the lexer and formatter as a comma-separated '
  438. 'list of key-value pairs. '
  439. 'Example: `-O bg=light,python=cool`.')
  440. operation.add_argument(
  441. '-P', metavar='OPTION=value', action='append',
  442. help='Give a single option to the lexer and formatter - with this '
  443. 'you can pass options whose value contains commas and equal signs. '
  444. 'Example: `-P "heading=Pygments, the Python highlighter"`.')
  445. operation.add_argument(
  446. '-o', metavar='OUTPUTFILE',
  447. help='Where to write the output. Defaults to standard output.')
  448. operation.add_argument(
  449. 'INPUTFILE', nargs='?',
  450. help='Where to read the input. Defaults to standard input.')
  451. flags = parser.add_argument_group('Operation flags')
  452. flags.add_argument(
  453. '-v', action='store_true',
  454. help='Print a detailed traceback on unhandled exceptions, which '
  455. 'is useful for debugging and bug reports.')
  456. flags.add_argument(
  457. '-s', action='store_true',
  458. help='Process lines one at a time until EOF, rather than waiting to '
  459. 'process the entire file. This only works for stdin, only for lexers '
  460. 'with no line-spanning constructs, and is intended for streaming '
  461. 'input such as you get from `tail -f`. '
  462. 'Example usage: `tail -f sql.log | pygmentize -s -l sql`.')
  463. flags.add_argument(
  464. '-x', action='store_true',
  465. help='Allow custom lexers and formatters to be loaded from a .py file '
  466. 'relative to the current working directory. For example, '
  467. '`-l ./customlexer.py -x`. By default, this option expects a file '
  468. 'with a class named CustomLexer or CustomFormatter; you can also '
  469. 'specify your own class name with a colon (`-l ./lexer.py:MyLexer`). '
  470. 'Users should be very careful not to use this option with untrusted '
  471. 'files, because it will import and run them.')
  472. special_modes_group = parser.add_argument_group(
  473. 'Special modes - do not do any highlighting')
  474. special_modes = special_modes_group.add_mutually_exclusive_group()
  475. special_modes.add_argument(
  476. '-S', metavar='STYLE -f formatter',
  477. help='Print style definitions for STYLE for a formatter '
  478. 'given with -f. The argument given by -a is formatter '
  479. 'dependent.')
  480. special_modes.add_argument(
  481. '-L', nargs='*', metavar='WHAT',
  482. help='List lexers, formatters, styles or filters -- '
  483. 'give additional arguments for the thing(s) you want to list '
  484. '(e.g. "styles"), or omit them to list everything.')
  485. special_modes.add_argument(
  486. '-N', metavar='FILENAME',
  487. help='Guess and print out a lexer name based solely on the given '
  488. 'filename. Does not take input or highlight anything. If no specific '
  489. 'lexer can be determined, "text" is printed.')
  490. special_modes.add_argument(
  491. '-C', action='store_true',
  492. help='Like -N, but print out a lexer name based solely on '
  493. 'a given content from standard input.')
  494. special_modes.add_argument(
  495. '-H', action='store', nargs=2, metavar=('NAME', 'TYPE'),
  496. help='Print detailed help for the object <name> of type <type>, '
  497. 'where <type> is one of "lexer", "formatter" or "filter".')
  498. special_modes.add_argument(
  499. '-V', action='store_true',
  500. help='Print the package version.')
  501. special_modes.add_argument(
  502. '-h', '--help', action='store_true',
  503. help='Print this help.')
  504. special_modes_group.add_argument(
  505. '-a', metavar='ARG',
  506. help='Formatter-specific additional argument for the -S (print '
  507. 'style sheet) mode.')
  508. argns = parser.parse_args(args[1:])
  509. try:
  510. return main_inner(parser, argns)
  511. except Exception:
  512. if argns.v:
  513. print(file=sys.stderr)
  514. print('*' * 65, file=sys.stderr)
  515. print('An unhandled exception occurred while highlighting.',
  516. file=sys.stderr)
  517. print('Please report the whole traceback to the issue tracker at',
  518. file=sys.stderr)
  519. print('<https://github.com/pygments/pygments/issues>.',
  520. file=sys.stderr)
  521. print('*' * 65, file=sys.stderr)
  522. print(file=sys.stderr)
  523. raise
  524. import traceback
  525. info = traceback.format_exception(*sys.exc_info())
  526. msg = info[-1].strip()
  527. if len(info) >= 3:
  528. # extract relevant file and position info
  529. msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:]
  530. print(file=sys.stderr)
  531. print('*** Error while highlighting:', file=sys.stderr)
  532. print(msg, file=sys.stderr)
  533. print('*** If this is a bug you want to report, please rerun with -v.',
  534. file=sys.stderr)
  535. return 1