123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511 |
- """
- pygments.formatters.latex
- ~~~~~~~~~~~~~~~~~~~~~~~~~
- Formatter for LaTeX fancyvrb output.
- :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- from io import StringIO
- from pygments.formatter import Formatter
- from pygments.lexer import Lexer, do_insertions
- from pygments.token import Token, STANDARD_TYPES
- from pygments.util import get_bool_opt, get_int_opt
- __all__ = ['LatexFormatter']
- def escape_tex(text, commandprefix):
- return text.replace('\\', '\x00'). \
- replace('{', '\x01'). \
- replace('}', '\x02'). \
- replace('\x00', r'\%sZbs{}' % commandprefix). \
- replace('\x01', r'\%sZob{}' % commandprefix). \
- replace('\x02', r'\%sZcb{}' % commandprefix). \
- replace('^', r'\%sZca{}' % commandprefix). \
- replace('_', r'\%sZus{}' % commandprefix). \
- replace('&', r'\%sZam{}' % commandprefix). \
- replace('<', r'\%sZlt{}' % commandprefix). \
- replace('>', r'\%sZgt{}' % commandprefix). \
- replace('#', r'\%sZsh{}' % commandprefix). \
- replace('%', r'\%sZpc{}' % commandprefix). \
- replace('$', r'\%sZdl{}' % commandprefix). \
- replace('-', r'\%sZhy{}' % commandprefix). \
- replace("'", r'\%sZsq{}' % commandprefix). \
- replace('"', r'\%sZdq{}' % commandprefix). \
- replace('~', r'\%sZti{}' % commandprefix)
- DOC_TEMPLATE = r'''
- \documentclass{%(docclass)s}
- \usepackage{fancyvrb}
- \usepackage{color}
- \usepackage[%(encoding)s]{inputenc}
- %(preamble)s
- %(styledefs)s
- \begin{document}
- \section*{%(title)s}
- %(code)s
- \end{document}
- '''
- ## Small explanation of the mess below :)
- #
- # The previous version of the LaTeX formatter just assigned a command to
- # each token type defined in the current style. That obviously is
- # problematic if the highlighted code is produced for a different style
- # than the style commands themselves.
- #
- # This version works much like the HTML formatter which assigns multiple
- # CSS classes to each <span> tag, from the most specific to the least
- # specific token type, thus falling back to the parent token type if one
- # is not defined. Here, the classes are there too and use the same short
- # forms given in token.STANDARD_TYPES.
- #
- # Highlighted code now only uses one custom command, which by default is
- # \PY and selectable by the commandprefix option (and in addition the
- # escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for
- # backwards compatibility purposes).
- #
- # \PY has two arguments: the classes, separated by +, and the text to
- # render in that style. The classes are resolved into the respective
- # style commands by magic, which serves to ignore unknown classes.
- #
- # The magic macros are:
- # * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text
- # to render in \PY@do. Their definition determines the style.
- # * \PY@reset resets \PY@it etc. to do nothing.
- # * \PY@toks parses the list of classes, using magic inspired by the
- # keyval package (but modified to use plusses instead of commas
- # because fancyvrb redefines commas inside its environments).
- # * \PY@tok processes one class, calling the \PY@tok@classname command
- # if it exists.
- # * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style
- # for its class.
- # * \PY resets the style, parses the classnames and then calls \PY@do.
- #
- # Tip: to read this code, print it out in substituted form using e.g.
- # >>> print STYLE_TEMPLATE % {'cp': 'PY'}
- STYLE_TEMPLATE = r'''
- \makeatletter
- \def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%
- \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%
- \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}
- \def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}
- \def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%
- \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}
- \def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%
- \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}
- \def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}
- %(styles)s
- \def\%(cp)sZbs{\char`\\}
- \def\%(cp)sZus{\char`\_}
- \def\%(cp)sZob{\char`\{}
- \def\%(cp)sZcb{\char`\}}
- \def\%(cp)sZca{\char`\^}
- \def\%(cp)sZam{\char`\&}
- \def\%(cp)sZlt{\char`\<}
- \def\%(cp)sZgt{\char`\>}
- \def\%(cp)sZsh{\char`\#}
- \def\%(cp)sZpc{\char`\%%}
- \def\%(cp)sZdl{\char`\$}
- \def\%(cp)sZhy{\char`\-}
- \def\%(cp)sZsq{\char`\'}
- \def\%(cp)sZdq{\char`\"}
- \def\%(cp)sZti{\char`\~}
- %% for compatibility with earlier versions
- \def\%(cp)sZat{@}
- \def\%(cp)sZlb{[}
- \def\%(cp)sZrb{]}
- \makeatother
- '''
- def _get_ttype_name(ttype):
- fname = STANDARD_TYPES.get(ttype)
- if fname:
- return fname
- aname = ''
- while fname is None:
- aname = ttype[-1] + aname
- ttype = ttype.parent
- fname = STANDARD_TYPES.get(ttype)
- return fname + aname
- class LatexFormatter(Formatter):
- r"""
- Format tokens as LaTeX code. This needs the `fancyvrb` and `color`
- standard packages.
- Without the `full` option, code is formatted as one ``Verbatim``
- environment, like this:
- .. sourcecode:: latex
- \begin{Verbatim}[commandchars=\\\{\}]
- \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):
- \PY{k}{pass}
- \end{Verbatim}
- The special command used here (``\PY``) and all the other macros it needs
- are output by the `get_style_defs` method.
- With the `full` option, a complete LaTeX document is output, including
- the command definitions in the preamble.
- The `get_style_defs()` method of a `LatexFormatter` returns a string
- containing ``\def`` commands defining the macros needed inside the
- ``Verbatim`` environments.
- Additional options accepted:
- `style`
- The style to use, can be a string or a Style subclass (default:
- ``'default'``).
- `full`
- Tells the formatter to output a "full" document, i.e. a complete
- self-contained document (default: ``False``).
- `title`
- If `full` is true, the title that should be used to caption the
- document (default: ``''``).
- `docclass`
- If the `full` option is enabled, this is the document class to use
- (default: ``'article'``).
- `preamble`
- If the `full` option is enabled, this can be further preamble commands,
- e.g. ``\usepackage`` (default: ``''``).
- `linenos`
- If set to ``True``, output line numbers (default: ``False``).
- `linenostart`
- The line number for the first line (default: ``1``).
- `linenostep`
- If set to a number n > 1, only every nth line number is printed.
- `verboptions`
- Additional options given to the Verbatim environment (see the *fancyvrb*
- docs for possible values) (default: ``''``).
- `commandprefix`
- The LaTeX commands used to produce colored output are constructed
- using this prefix and some letters (default: ``'PY'``).
- .. versionadded:: 0.7
- .. versionchanged:: 0.10
- The default is now ``'PY'`` instead of ``'C'``.
- `texcomments`
- If set to ``True``, enables LaTeX comment lines. That is, LaTex markup
- in comment tokens is not escaped so that LaTeX can render it (default:
- ``False``).
- .. versionadded:: 1.2
- `mathescape`
- If set to ``True``, enables LaTeX math mode escape in comments. That
- is, ``'$...$'`` inside a comment will trigger math mode (default:
- ``False``).
- .. versionadded:: 1.2
- `escapeinside`
- If set to a string of length 2, enables escaping to LaTeX. Text
- delimited by these 2 characters is read as LaTeX code and
- typeset accordingly. It has no effect in string literals. It has
- no effect in comments if `texcomments` or `mathescape` is
- set. (default: ``''``).
- .. versionadded:: 2.0
- `envname`
- Allows you to pick an alternative environment name replacing Verbatim.
- The alternate environment still has to support Verbatim's option syntax.
- (default: ``'Verbatim'``).
- .. versionadded:: 2.0
- """
- name = 'LaTeX'
- aliases = ['latex', 'tex']
- filenames = ['*.tex']
- def __init__(self, **options):
- Formatter.__init__(self, **options)
- self.docclass = options.get('docclass', 'article')
- self.preamble = options.get('preamble', '')
- self.linenos = get_bool_opt(options, 'linenos', False)
- self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
- self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
- self.verboptions = options.get('verboptions', '')
- self.nobackground = get_bool_opt(options, 'nobackground', False)
- self.commandprefix = options.get('commandprefix', 'PY')
- self.texcomments = get_bool_opt(options, 'texcomments', False)
- self.mathescape = get_bool_opt(options, 'mathescape', False)
- self.escapeinside = options.get('escapeinside', '')
- if len(self.escapeinside) == 2:
- self.left = self.escapeinside[0]
- self.right = self.escapeinside[1]
- else:
- self.escapeinside = ''
- self.envname = options.get('envname', 'Verbatim')
- self._create_stylesheet()
- def _create_stylesheet(self):
- t2n = self.ttype2name = {Token: ''}
- c2d = self.cmd2def = {}
- cp = self.commandprefix
- def rgbcolor(col):
- if col:
- return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0)
- for i in (0, 2, 4)])
- else:
- return '1,1,1'
- for ttype, ndef in self.style:
- name = _get_ttype_name(ttype)
- cmndef = ''
- if ndef['bold']:
- cmndef += r'\let\$$@bf=\textbf'
- if ndef['italic']:
- cmndef += r'\let\$$@it=\textit'
- if ndef['underline']:
- cmndef += r'\let\$$@ul=\underline'
- if ndef['roman']:
- cmndef += r'\let\$$@ff=\textrm'
- if ndef['sans']:
- cmndef += r'\let\$$@ff=\textsf'
- if ndef['mono']:
- cmndef += r'\let\$$@ff=\textsf'
- if ndef['color']:
- cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' %
- rgbcolor(ndef['color']))
- if ndef['border']:
- cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}'
- r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' %
- (rgbcolor(ndef['border']),
- rgbcolor(ndef['bgcolor'])))
- elif ndef['bgcolor']:
- cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}'
- r'\colorbox[rgb]{%s}{\strut ##1}}}' %
- rgbcolor(ndef['bgcolor']))
- if cmndef == '':
- continue
- cmndef = cmndef.replace('$$', cp)
- t2n[ttype] = name
- c2d[name] = cmndef
- def get_style_defs(self, arg=''):
- """
- Return the command sequences needed to define the commands
- used to format text in the verbatim environment. ``arg`` is ignored.
- """
- cp = self.commandprefix
- styles = []
- for name, definition in self.cmd2def.items():
- styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition))
- return STYLE_TEMPLATE % {'cp': self.commandprefix,
- 'styles': '\n'.join(styles)}
- def format_unencoded(self, tokensource, outfile):
- # TODO: add support for background colors
- t2n = self.ttype2name
- cp = self.commandprefix
- if self.full:
- realoutfile = outfile
- outfile = StringIO()
- outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')
- if self.linenos:
- start, step = self.linenostart, self.linenostep
- outfile.write(',numbers=left' +
- (start and ',firstnumber=%d' % start or '') +
- (step and ',stepnumber=%d' % step or ''))
- if self.mathescape or self.texcomments or self.escapeinside:
- outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7'
- '\\catcode`\\_=8\\relax}')
- if self.verboptions:
- outfile.write(',' + self.verboptions)
- outfile.write(']\n')
- for ttype, value in tokensource:
- if ttype in Token.Comment:
- if self.texcomments:
- # Try to guess comment starting lexeme and escape it ...
- start = value[0:1]
- for i in range(1, len(value)):
- if start[0] != value[i]:
- break
- start += value[i]
- value = value[len(start):]
- start = escape_tex(start, cp)
- # ... but do not escape inside comment.
- value = start + value
- elif self.mathescape:
- # Only escape parts not inside a math environment.
- parts = value.split('$')
- in_math = False
- for i, part in enumerate(parts):
- if not in_math:
- parts[i] = escape_tex(part, cp)
- in_math = not in_math
- value = '$'.join(parts)
- elif self.escapeinside:
- text = value
- value = ''
- while text:
- a, sep1, text = text.partition(self.left)
- if sep1:
- b, sep2, text = text.partition(self.right)
- if sep2:
- value += escape_tex(a, cp) + b
- else:
- value += escape_tex(a + sep1 + b, cp)
- else:
- value += escape_tex(a, cp)
- else:
- value = escape_tex(value, cp)
- elif ttype not in Token.Escape:
- value = escape_tex(value, cp)
- styles = []
- while ttype is not Token:
- try:
- styles.append(t2n[ttype])
- except KeyError:
- # not in current style
- styles.append(_get_ttype_name(ttype))
- ttype = ttype.parent
- styleval = '+'.join(reversed(styles))
- if styleval:
- spl = value.split('\n')
- for line in spl[:-1]:
- if line:
- outfile.write("\\%s{%s}{%s}" % (cp, styleval, line))
- outfile.write('\n')
- if spl[-1]:
- outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1]))
- else:
- outfile.write(value)
- outfile.write('\\end{' + self.envname + '}\n')
- if self.full:
- encoding = self.encoding or 'utf8'
- # map known existings encodings from LaTeX distribution
- encoding = {
- 'utf_8': 'utf8',
- 'latin_1': 'latin1',
- 'iso_8859_1': 'latin1',
- }.get(encoding.replace('-', '_'), encoding)
- realoutfile.write(DOC_TEMPLATE %
- dict(docclass = self.docclass,
- preamble = self.preamble,
- title = self.title,
- encoding = encoding,
- styledefs = self.get_style_defs(),
- code = outfile.getvalue()))
- class LatexEmbeddedLexer(Lexer):
- """
- This lexer takes one lexer as argument, the lexer for the language
- being formatted, and the left and right delimiters for escaped text.
- First everything is scanned using the language lexer to obtain
- strings and comments. All other consecutive tokens are merged and
- the resulting text is scanned for escaped segments, which are given
- the Token.Escape type. Finally text that is not escaped is scanned
- again with the language lexer.
- """
- def __init__(self, left, right, lang, **options):
- self.left = left
- self.right = right
- self.lang = lang
- Lexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
- # find and remove all the escape tokens (replace with an empty string)
- # this is very similar to DelegatingLexer.get_tokens_unprocessed.
- buffered = ''
- insertions = []
- insertion_buf = []
- for i, t, v in self._find_safe_escape_tokens(text):
- if t is None:
- if insertion_buf:
- insertions.append((len(buffered), insertion_buf))
- insertion_buf = []
- buffered += v
- else:
- insertion_buf.append((i, t, v))
- if insertion_buf:
- insertions.append((len(buffered), insertion_buf))
- return do_insertions(insertions,
- self.lang.get_tokens_unprocessed(buffered))
- def _find_safe_escape_tokens(self, text):
- """ find escape tokens that are not in strings or comments """
- for i, t, v in self._filter_to(
- self.lang.get_tokens_unprocessed(text),
- lambda t: t in Token.Comment or t in Token.String
- ):
- if t is None:
- for i2, t2, v2 in self._find_escape_tokens(v):
- yield i + i2, t2, v2
- else:
- yield i, None, v
- def _filter_to(self, it, pred):
- """ Keep only the tokens that match `pred`, merge the others together """
- buf = ''
- idx = 0
- for i, t, v in it:
- if pred(t):
- if buf:
- yield idx, None, buf
- buf = ''
- yield i, t, v
- else:
- if not buf:
- idx = i
- buf += v
- if buf:
- yield idx, None, buf
- def _find_escape_tokens(self, text):
- """ Find escape tokens within text, give token=None otherwise """
- index = 0
- while text:
- a, sep1, text = text.partition(self.left)
- if a:
- yield index, None, a
- index += len(a)
- if sep1:
- b, sep2, text = text.partition(self.right)
- if sep2:
- yield index + len(sep1), Token.Escape, b
- index += len(sep1) + len(b) + len(sep2)
- else:
- yield index, Token.Error, sep1
- index += len(sep1)
- text = b
|