Scanning.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. # cython: infer_types=True, language_level=3, py2_import=True, auto_pickle=False
  2. #
  3. # Cython Scanner
  4. #
  5. from __future__ import absolute_import
  6. import cython
  7. cython.declare(make_lexicon=object, lexicon=object,
  8. print_function=object, error=object, warning=object,
  9. os=object, platform=object)
  10. import os
  11. import platform
  12. from .. import Utils
  13. from ..Plex.Scanners import Scanner
  14. from ..Plex.Errors import UnrecognizedInput
  15. from .Errors import error, warning
  16. from .Lexicon import any_string_prefix, make_lexicon, IDENT
  17. from .Future import print_function
  18. debug_scanner = 0
  19. trace_scanner = 0
  20. scanner_debug_flags = 0
  21. scanner_dump_file = None
  22. lexicon = None
  23. def get_lexicon():
  24. global lexicon
  25. if not lexicon:
  26. lexicon = make_lexicon()
  27. return lexicon
  28. #------------------------------------------------------------------
  29. py_reserved_words = [
  30. "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
  31. "continue", "return", "raise", "import", "exec", "try",
  32. "except", "finally", "while", "if", "elif", "else", "for",
  33. "in", "assert", "and", "or", "not", "is", "lambda",
  34. "from", "yield", "with",
  35. ]
  36. pyx_reserved_words = py_reserved_words + [
  37. "include", "ctypedef", "cdef", "cpdef",
  38. "cimport", "DEF", "IF", "ELIF", "ELSE"
  39. ]
  40. class Method(object):
  41. def __init__(self, name, **kwargs):
  42. self.name = name
  43. self.kwargs = kwargs or None
  44. self.__name__ = name # for Plex tracing
  45. def __call__(self, stream, text):
  46. method = getattr(stream, self.name)
  47. # self.kwargs is almost always unused => avoid call overhead
  48. return method(text, **self.kwargs) if self.kwargs is not None else method(text)
  49. def __copy__(self):
  50. return self # immutable, no need to copy
  51. def __deepcopy__(self, memo):
  52. return self # immutable, no need to copy
  53. #------------------------------------------------------------------
  54. class CompileTimeScope(object):
  55. def __init__(self, outer=None):
  56. self.entries = {}
  57. self.outer = outer
  58. def declare(self, name, value):
  59. self.entries[name] = value
  60. def update(self, other):
  61. self.entries.update(other)
  62. def lookup_here(self, name):
  63. return self.entries[name]
  64. def __contains__(self, name):
  65. return name in self.entries
  66. def lookup(self, name):
  67. try:
  68. return self.lookup_here(name)
  69. except KeyError:
  70. outer = self.outer
  71. if outer:
  72. return outer.lookup(name)
  73. else:
  74. raise
  75. def initial_compile_time_env():
  76. benv = CompileTimeScope()
  77. names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE')
  78. for name, value in zip(names, platform.uname()):
  79. benv.declare(name, value)
  80. try:
  81. import __builtin__ as builtins
  82. except ImportError:
  83. import builtins
  84. names = (
  85. 'False', 'True',
  86. 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
  87. 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
  88. 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
  89. 'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
  90. 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
  91. 'sum', 'tuple', 'zip',
  92. ### defined below in a platform independent way
  93. # 'long', 'unicode', 'reduce', 'xrange'
  94. )
  95. for name in names:
  96. try:
  97. benv.declare(name, getattr(builtins, name))
  98. except AttributeError:
  99. # ignore, likely Py3
  100. pass
  101. # Py2/3 adaptations
  102. from functools import reduce
  103. benv.declare('reduce', reduce)
  104. benv.declare('unicode', getattr(builtins, 'unicode', getattr(builtins, 'str')))
  105. benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int')))
  106. benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range')))
  107. denv = CompileTimeScope(benv)
  108. return denv
  109. #------------------------------------------------------------------
  110. class SourceDescriptor(object):
  111. """
  112. A SourceDescriptor should be considered immutable.
  113. """
  114. filename = None
  115. _file_type = 'pyx'
  116. _escaped_description = None
  117. _cmp_name = ''
  118. def __str__(self):
  119. assert False # To catch all places where a descriptor is used directly as a filename
  120. def set_file_type_from_name(self, filename):
  121. name, ext = os.path.splitext(filename)
  122. self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
  123. def is_cython_file(self):
  124. return self._file_type in ('pyx', 'pxd')
  125. def is_python_file(self):
  126. return self._file_type == 'py'
  127. def get_escaped_description(self):
  128. if self._escaped_description is None:
  129. esc_desc = \
  130. self.get_description().encode('ASCII', 'replace').decode("ASCII")
  131. # Use forward slashes on Windows since these paths
  132. # will be used in the #line directives in the C/C++ files.
  133. self._escaped_description = esc_desc.replace('\\', '/')
  134. return self._escaped_description
  135. def __gt__(self, other):
  136. # this is only used to provide some sort of order
  137. try:
  138. return self._cmp_name > other._cmp_name
  139. except AttributeError:
  140. return False
  141. def __lt__(self, other):
  142. # this is only used to provide some sort of order
  143. try:
  144. return self._cmp_name < other._cmp_name
  145. except AttributeError:
  146. return False
  147. def __le__(self, other):
  148. # this is only used to provide some sort of order
  149. try:
  150. return self._cmp_name <= other._cmp_name
  151. except AttributeError:
  152. return False
  153. def __copy__(self):
  154. return self # immutable, no need to copy
  155. def __deepcopy__(self, memo):
  156. return self # immutable, no need to copy
  157. class FileSourceDescriptor(SourceDescriptor):
  158. """
  159. Represents a code source. A code source is a more generic abstraction
  160. for a "filename" (as sometimes the code doesn't come from a file).
  161. Instances of code sources are passed to Scanner.__init__ as the
  162. optional name argument and will be passed back when asking for
  163. the position()-tuple.
  164. """
  165. def __init__(self, filename, path_description=None):
  166. filename = Utils.decode_filename(filename)
  167. self.path_description = path_description or filename
  168. self.filename = filename
  169. # Prefer relative paths to current directory (which is most likely the project root) over absolute paths.
  170. workdir = os.path.abspath('.') + os.sep
  171. self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename
  172. self.set_file_type_from_name(filename)
  173. self._cmp_name = filename
  174. self._lines = {}
  175. def get_lines(self, encoding=None, error_handling=None):
  176. # we cache the lines only the second time this is called, in
  177. # order to save memory when they are only used once
  178. key = (encoding, error_handling)
  179. try:
  180. lines = self._lines[key]
  181. if lines is not None:
  182. return lines
  183. except KeyError:
  184. pass
  185. with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f:
  186. lines = list(f)
  187. if key in self._lines:
  188. self._lines[key] = lines
  189. else:
  190. # do not cache the first access, but remember that we
  191. # already read it once
  192. self._lines[key] = None
  193. return lines
  194. def get_description(self):
  195. try:
  196. return os.path.relpath(self.path_description)
  197. except ValueError:
  198. # path not under current directory => use complete file path
  199. return self.path_description
  200. def get_error_description(self):
  201. path = self.filename
  202. cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
  203. if path.startswith(cwd):
  204. return path[len(cwd):]
  205. return path
  206. def get_filenametable_entry(self):
  207. return self.file_path
  208. def __eq__(self, other):
  209. return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
  210. def __hash__(self):
  211. return hash(self.filename)
  212. def __repr__(self):
  213. return "<FileSourceDescriptor:%s>" % self.filename
  214. class StringSourceDescriptor(SourceDescriptor):
  215. """
  216. Instances of this class can be used instead of a filenames if the
  217. code originates from a string object.
  218. """
  219. def __init__(self, name, code):
  220. self.name = name
  221. #self.set_file_type_from_name(name)
  222. self.codelines = [x + "\n" for x in code.split("\n")]
  223. self._cmp_name = name
  224. def get_lines(self, encoding=None, error_handling=None):
  225. if not encoding:
  226. return self.codelines
  227. else:
  228. return [line.encode(encoding, error_handling).decode(encoding)
  229. for line in self.codelines]
  230. def get_description(self):
  231. return self.name
  232. get_error_description = get_description
  233. def get_filenametable_entry(self):
  234. return "stringsource"
  235. def __hash__(self):
  236. return id(self)
  237. # Do not hash on the name, an identical string source should be the
  238. # same object (name is often defaulted in other places)
  239. # return hash(self.name)
  240. def __eq__(self, other):
  241. return isinstance(other, StringSourceDescriptor) and self.name == other.name
  242. def __repr__(self):
  243. return "<StringSourceDescriptor:%s>" % self.name
  244. #------------------------------------------------------------------
  245. class PyrexScanner(Scanner):
  246. # context Context Compilation context
  247. # included_files [string] Files included with 'include' statement
  248. # compile_time_env dict Environment for conditional compilation
  249. # compile_time_eval boolean In a true conditional compilation context
  250. # compile_time_expr boolean In a compile-time expression context
  251. def __init__(self, file, filename, parent_scanner=None,
  252. scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
  253. Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
  254. if filename.is_python_file():
  255. self.in_python_file = True
  256. self.keywords = set(py_reserved_words)
  257. else:
  258. self.in_python_file = False
  259. self.keywords = set(pyx_reserved_words)
  260. self.async_enabled = 0
  261. if parent_scanner:
  262. self.context = parent_scanner.context
  263. self.included_files = parent_scanner.included_files
  264. self.compile_time_env = parent_scanner.compile_time_env
  265. self.compile_time_eval = parent_scanner.compile_time_eval
  266. self.compile_time_expr = parent_scanner.compile_time_expr
  267. if parent_scanner.async_enabled:
  268. self.enter_async()
  269. else:
  270. self.context = context
  271. self.included_files = scope.included_files
  272. self.compile_time_env = initial_compile_time_env()
  273. self.compile_time_eval = 1
  274. self.compile_time_expr = 0
  275. if getattr(context.options, 'compile_time_env', None):
  276. self.compile_time_env.update(context.options.compile_time_env)
  277. self.parse_comments = parse_comments
  278. self.source_encoding = source_encoding
  279. self.trace = trace_scanner
  280. self.indentation_stack = [0]
  281. self.indentation_char = None
  282. self.bracket_nesting_level = 0
  283. self.begin('INDENT')
  284. self.sy = ''
  285. self.next()
  286. def commentline(self, text):
  287. if self.parse_comments:
  288. self.produce('commentline', text)
  289. def strip_underscores(self, text, symbol):
  290. self.produce(symbol, text.replace('_', ''))
  291. def current_level(self):
  292. return self.indentation_stack[-1]
  293. def open_bracket_action(self, text):
  294. self.bracket_nesting_level += 1
  295. return text
  296. def close_bracket_action(self, text):
  297. self.bracket_nesting_level -= 1
  298. return text
  299. def newline_action(self, text):
  300. if self.bracket_nesting_level == 0:
  301. self.begin('INDENT')
  302. self.produce('NEWLINE', '')
  303. string_states = {
  304. "'": 'SQ_STRING',
  305. '"': 'DQ_STRING',
  306. "'''": 'TSQ_STRING',
  307. '"""': 'TDQ_STRING'
  308. }
  309. def begin_string_action(self, text):
  310. while text[:1] in any_string_prefix:
  311. text = text[1:]
  312. self.begin(self.string_states[text])
  313. self.produce('BEGIN_STRING')
  314. def end_string_action(self, text):
  315. self.begin('')
  316. self.produce('END_STRING')
  317. def unclosed_string_action(self, text):
  318. self.end_string_action(text)
  319. self.error("Unclosed string literal")
  320. def indentation_action(self, text):
  321. self.begin('')
  322. # Indentation within brackets should be ignored.
  323. #if self.bracket_nesting_level > 0:
  324. # return
  325. # Check that tabs and spaces are being used consistently.
  326. if text:
  327. c = text[0]
  328. #print "Scanner.indentation_action: indent with", repr(c) ###
  329. if self.indentation_char is None:
  330. self.indentation_char = c
  331. #print "Scanner.indentation_action: setting indent_char to", repr(c)
  332. else:
  333. if self.indentation_char != c:
  334. self.error("Mixed use of tabs and spaces")
  335. if text.replace(c, "") != "":
  336. self.error("Mixed use of tabs and spaces")
  337. # Figure out how many indents/dedents to do
  338. current_level = self.current_level()
  339. new_level = len(text)
  340. #print "Changing indent level from", current_level, "to", new_level ###
  341. if new_level == current_level:
  342. return
  343. elif new_level > current_level:
  344. #print "...pushing level", new_level ###
  345. self.indentation_stack.append(new_level)
  346. self.produce('INDENT', '')
  347. else:
  348. while new_level < self.current_level():
  349. #print "...popping level", self.indentation_stack[-1] ###
  350. self.indentation_stack.pop()
  351. self.produce('DEDENT', '')
  352. #print "...current level now", self.current_level() ###
  353. if new_level != self.current_level():
  354. self.error("Inconsistent indentation")
  355. def eof_action(self, text):
  356. while len(self.indentation_stack) > 1:
  357. self.produce('DEDENT', '')
  358. self.indentation_stack.pop()
  359. self.produce('EOF', '')
  360. def next(self):
  361. try:
  362. sy, systring = self.read()
  363. except UnrecognizedInput:
  364. self.error("Unrecognized character")
  365. return # just a marker, error() always raises
  366. if sy == IDENT:
  367. if systring in self.keywords:
  368. if systring == u'print' and print_function in self.context.future_directives:
  369. self.keywords.discard('print')
  370. elif systring == u'exec' and self.context.language_level >= 3:
  371. self.keywords.discard('exec')
  372. else:
  373. sy = systring
  374. systring = self.context.intern_ustring(systring)
  375. self.sy = sy
  376. self.systring = systring
  377. if False: # debug_scanner:
  378. _, line, col = self.position()
  379. if not self.systring or self.sy == self.systring:
  380. t = self.sy
  381. else:
  382. t = "%s %s" % (self.sy, self.systring)
  383. print("--- %3d %2d %s" % (line, col, t))
  384. def peek(self):
  385. saved = self.sy, self.systring
  386. self.next()
  387. next = self.sy, self.systring
  388. self.unread(*next)
  389. self.sy, self.systring = saved
  390. return next
  391. def put_back(self, sy, systring):
  392. self.unread(self.sy, self.systring)
  393. self.sy = sy
  394. self.systring = systring
  395. def unread(self, token, value):
  396. # This method should be added to Plex
  397. self.queue.insert(0, (token, value))
  398. def error(self, message, pos=None, fatal=True):
  399. if pos is None:
  400. pos = self.position()
  401. if self.sy == 'INDENT':
  402. error(pos, "Possible inconsistent indentation")
  403. err = error(pos, message)
  404. if fatal: raise err
  405. def expect(self, what, message=None):
  406. if self.sy == what:
  407. self.next()
  408. else:
  409. self.expected(what, message)
  410. def expect_keyword(self, what, message=None):
  411. if self.sy == IDENT and self.systring == what:
  412. self.next()
  413. else:
  414. self.expected(what, message)
  415. def expected(self, what, message=None):
  416. if message:
  417. self.error(message)
  418. else:
  419. if self.sy == IDENT:
  420. found = self.systring
  421. else:
  422. found = self.sy
  423. self.error("Expected '%s', found '%s'" % (what, found))
  424. def expect_indent(self):
  425. self.expect('INDENT', "Expected an increase in indentation level")
  426. def expect_dedent(self):
  427. self.expect('DEDENT', "Expected a decrease in indentation level")
  428. def expect_newline(self, message="Expected a newline", ignore_semicolon=False):
  429. # Expect either a newline or end of file
  430. useless_trailing_semicolon = None
  431. if ignore_semicolon and self.sy == ';':
  432. useless_trailing_semicolon = self.position()
  433. self.next()
  434. if self.sy != 'EOF':
  435. self.expect('NEWLINE', message)
  436. if useless_trailing_semicolon is not None:
  437. warning(useless_trailing_semicolon, "useless trailing semicolon")
  438. def enter_async(self):
  439. self.async_enabled += 1
  440. if self.async_enabled == 1:
  441. self.keywords.add('async')
  442. self.keywords.add('await')
  443. def exit_async(self):
  444. assert self.async_enabled > 0
  445. self.async_enabled -= 1
  446. if not self.async_enabled:
  447. self.keywords.discard('await')
  448. self.keywords.discard('async')
  449. if self.sy in ('async', 'await'):
  450. self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy)