123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320 |
- """
- pygments.lexers.php
- ~~~~~~~~~~~~~~~~~~~
- Lexers for PHP and related languages.
- :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import Lexer, RegexLexer, include, bygroups, default, \
- using, this, words, do_insertions
- from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation, Other, Generic
- from pygments.util import get_bool_opt, get_list_opt, shebang_matches
- __all__ = ['ZephirLexer', 'PsyshConsoleLexer', 'PhpLexer']
- line_re = re.compile('.*?\n')
- class ZephirLexer(RegexLexer):
- """
- For `Zephir language <http://zephir-lang.com/>`_ source code.
- Zephir is a compiled high level language aimed
- to the creation of C-extensions for PHP.
- .. versionadded:: 2.0
- """
- name = 'Zephir'
- aliases = ['zephir']
- filenames = ['*.zep']
- zephir_keywords = ['fetch', 'echo', 'isset', 'empty']
- zephir_type = ['bit', 'bits', 'string']
- flags = re.DOTALL | re.MULTILINE
- tokens = {
- 'commentsandwhitespace': [
- (r'\s+', Text),
- (r'//.*?\n', Comment.Single),
- (r'/\*.*?\*/', Comment.Multiline)
- ],
- 'slashstartsregex': [
- include('commentsandwhitespace'),
- (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
- r'([gim]+\b|\B)', String.Regex, '#pop'),
- (r'/', Operator, '#pop'),
- default('#pop')
- ],
- 'badregex': [
- (r'\n', Text, '#pop')
- ],
- 'root': [
- (r'^(?=\s|/)', Text, 'slashstartsregex'),
- include('commentsandwhitespace'),
- (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
- r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
- (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
- (r'[})\].]', Punctuation),
- (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|'
- r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
- r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|'
- r'empty)\b', Keyword, 'slashstartsregex'),
- (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
- (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|'
- r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|'
- r'float|unsigned|private|protected|public|short|static|self|throws|reverse|'
- r'transient|volatile)\b', Keyword.Reserved),
- (r'(true|false|null|undefined)\b', Keyword.Constant),
- (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
- r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
- r'window)\b', Name.Builtin),
- (r'[$a-zA-Z_][\w\\]*', Name.Other),
- (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
- (r'0x[0-9a-fA-F]+', Number.Hex),
- (r'[0-9]+', Number.Integer),
- (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
- (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
- ]
- }
- class PsyshConsoleLexer(Lexer):
- """
- For `PsySH`_ console output, such as:
- .. sourcecode:: psysh
- >>> $greeting = function($name): string {
- ... return "Hello, {$name}";
- ... };
- => Closure($name): string {#2371 …3}
- >>> $greeting('World')
- => "Hello, World"
- .. _PsySH: https://psysh.org/
- .. versionadded:: 2.7
- """
- name = 'PsySH console session for PHP'
- aliases = ['psysh']
- def __init__(self, **options):
- options['startinline'] = True
- Lexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
- phplexer = PhpLexer(**self.options)
- curcode = ''
- insertions = []
- for match in line_re.finditer(text):
- line = match.group()
- if line.startswith('>>> ') or line.startswith('... '):
- insertions.append((len(curcode),
- [(0, Generic.Prompt, line[:4])]))
- curcode += line[4:]
- elif line.rstrip() == '...':
- insertions.append((len(curcode),
- [(0, Generic.Prompt, '...')]))
- curcode += line[3:]
- else:
- if curcode:
- yield from do_insertions(
- insertions, phplexer.get_tokens_unprocessed(curcode))
- curcode = ''
- insertions = []
- yield match.start(), Generic.Output, line
- if curcode:
- yield from do_insertions(insertions,
- phplexer.get_tokens_unprocessed(curcode))
- class PhpLexer(RegexLexer):
- """
- For `PHP <http://www.php.net/>`_ source code.
- For PHP embedded in HTML, use the `HtmlPhpLexer`.
- Additional options accepted:
- `startinline`
- If given and ``True`` the lexer starts highlighting with
- php code (i.e.: no starting ``<?php`` required). The default
- is ``False``.
- `funcnamehighlighting`
- If given and ``True``, highlight builtin function names
- (default: ``True``).
- `disabledmodules`
- If given, must be a list of module names whose function names
- should not be highlighted. By default all modules are highlighted
- except the special ``'unknown'`` module that includes functions
- that are known to php but are undocumented.
- To get a list of allowed modules have a look into the
- `_php_builtins` module:
- .. sourcecode:: pycon
- >>> from pygments.lexers._php_builtins import MODULES
- >>> MODULES.keys()
- ['PHP Options/Info', 'Zip', 'dba', ...]
- In fact the names of those modules match the module names from
- the php documentation.
- """
- name = 'PHP'
- aliases = ['php', 'php3', 'php4', 'php5']
- filenames = ['*.php', '*.php[345]', '*.inc']
- mimetypes = ['text/x-php']
- # Note that a backslash is included in the following two patterns
- # PHP uses a backslash as a namespace separator
- _ident_char = r'[\\\w]|[^\x00-\x7f]'
- _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])'
- _ident_end = r'(?:' + _ident_char + ')*'
- _ident_inner = _ident_begin + _ident_end
- flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
- tokens = {
- 'root': [
- (r'<\?(php)?', Comment.Preproc, 'php'),
- (r'[^<]+', Other),
- (r'<', Other)
- ],
- 'php': [
- (r'\?>', Comment.Preproc, '#pop'),
- (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
- bygroups(String, String, String.Delimiter, String, String.Delimiter,
- Punctuation, Text)),
- (r'\s+', Text),
- (r'#.*?\n', Comment.Single),
- (r'//.*?\n', Comment.Single),
- # put the empty comment here, it is otherwise seen as
- # the start of a docstring
- (r'/\*\*/', Comment.Multiline),
- (r'/\*\*.*?\*/', String.Doc),
- (r'/\*.*?\*/', Comment.Multiline),
- (r'(->|::)(\s*)(' + _ident_inner + ')',
- bygroups(Operator, Text, Name.Attribute)),
- (r'[~!%^&*+=|:.<>/@-]+', Operator),
- (r'\?', Operator), # don't add to the charclass above!
- (r'[\[\]{}();,]+', Punctuation),
- (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
- (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
- (r'(function)(\s+)(&?)(\s*)',
- bygroups(Keyword, Text, Operator, Text), 'functionname'),
- (r'(const)(\s+)(' + _ident_inner + ')',
- bygroups(Keyword, Text, Name.Constant)),
- (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
- r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
- r'FALSE|print|for|require|continue|foreach|require_once|'
- r'declare|return|default|static|do|switch|die|stdClass|'
- r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
- r'virtual|endfor|include_once|while|endforeach|global|'
- r'endif|list|endswitch|new|endwhile|not|'
- r'array|E_ALL|NULL|final|php_user_filter|interface|'
- r'implements|public|private|protected|abstract|clone|try|'
- r'catch|throw|this|use|namespace|trait|yield|'
- r'finally)\b', Keyword),
- (r'(true|false|null)\b', Keyword.Constant),
- include('magicconstants'),
- (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable),
- (r'\$+' + _ident_inner, Name.Variable),
- (_ident_inner, Name.Other),
- (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float),
- (r'\d+e[+-]?[0-9]+', Number.Float),
- (r'0[0-7]+', Number.Oct),
- (r'0x[a-f0-9]+', Number.Hex),
- (r'\d+', Number.Integer),
- (r'0b[01]+', Number.Bin),
- (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
- (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
- (r'"', String.Double, 'string'),
- ],
- 'magicfuncs': [
- # source: http://php.net/manual/en/language.oop5.magic.php
- (words((
- '__construct', '__destruct', '__call', '__callStatic', '__get', '__set',
- '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke',
- '__set_state', '__clone', '__debugInfo',), suffix=r'\b'),
- Name.Function.Magic),
- ],
- 'magicconstants': [
- # source: http://php.net/manual/en/language.constants.predefined.php
- (words((
- '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__',
- '__TRAIT__', '__METHOD__', '__NAMESPACE__',),
- suffix=r'\b'),
- Name.Constant),
- ],
- 'classname': [
- (_ident_inner, Name.Class, '#pop')
- ],
- 'functionname': [
- include('magicfuncs'),
- (_ident_inner, Name.Function, '#pop'),
- default('#pop')
- ],
- 'string': [
- (r'"', String.Double, '#pop'),
- (r'[^{$"\\]+', String.Double),
- (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape),
- (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?',
- String.Interpol),
- (r'(\{\$\{)(.*?)(\}\})',
- bygroups(String.Interpol, using(this, _startinline=True),
- String.Interpol)),
- (r'(\{)(\$.*?)(\})',
- bygroups(String.Interpol, using(this, _startinline=True),
- String.Interpol)),
- (r'(\$\{)(\S+)(\})',
- bygroups(String.Interpol, Name.Variable, String.Interpol)),
- (r'[${\\]', String.Double)
- ],
- }
- def __init__(self, **options):
- self.funcnamehighlighting = get_bool_opt(
- options, 'funcnamehighlighting', True)
- self.disabledmodules = get_list_opt(
- options, 'disabledmodules', ['unknown'])
- self.startinline = get_bool_opt(options, 'startinline', False)
- # private option argument for the lexer itself
- if '_startinline' in options:
- self.startinline = options.pop('_startinline')
- # collect activated functions in a set
- self._functions = set()
- if self.funcnamehighlighting:
- from pygments.lexers._php_builtins import MODULES
- for key, value in MODULES.items():
- if key not in self.disabledmodules:
- self._functions.update(value)
- RegexLexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
- stack = ['root']
- if self.startinline:
- stack.append('php')
- for index, token, value in \
- RegexLexer.get_tokens_unprocessed(self, text, stack):
- if token is Name.Other:
- if value in self._functions:
- yield index, Name.Builtin, value
- continue
- yield index, token, value
- def analyse_text(text):
- if shebang_matches(text, r'php'):
- return True
- rv = 0.0
- if re.search(r'<\?(?!xml)', text):
- rv += 0.3
- return rv
|