robotframework.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. """
  2. pygments.lexers.robotframework
  3. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexer for Robot Framework.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. # Copyright 2012 Nokia Siemens Networks Oyj
  9. #
  10. # Licensed under the Apache License, Version 2.0 (the "License");
  11. # you may not use this file except in compliance with the License.
  12. # You may obtain a copy of the License at
  13. #
  14. # http://www.apache.org/licenses/LICENSE-2.0
  15. #
  16. # Unless required by applicable law or agreed to in writing, software
  17. # distributed under the License is distributed on an "AS IS" BASIS,
  18. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. # See the License for the specific language governing permissions and
  20. # limitations under the License.
  21. import re
  22. from pygments.lexer import Lexer
  23. from pygments.token import Token
  24. __all__ = ['RobotFrameworkLexer']
  25. HEADING = Token.Generic.Heading
  26. SETTING = Token.Keyword.Namespace
  27. IMPORT = Token.Name.Namespace
  28. TC_KW_NAME = Token.Generic.Subheading
  29. KEYWORD = Token.Name.Function
  30. ARGUMENT = Token.String
  31. VARIABLE = Token.Name.Variable
  32. COMMENT = Token.Comment
  33. SEPARATOR = Token.Punctuation
  34. SYNTAX = Token.Punctuation
  35. GHERKIN = Token.Generic.Emph
  36. ERROR = Token.Error
  37. def normalize(string, remove=''):
  38. string = string.lower()
  39. for char in remove + ' ':
  40. if char in string:
  41. string = string.replace(char, '')
  42. return string
  43. class RobotFrameworkLexer(Lexer):
  44. """
  45. For `Robot Framework <http://robotframework.org>`_ test data.
  46. Supports both space and pipe separated plain text formats.
  47. .. versionadded:: 1.6
  48. """
  49. name = 'RobotFramework'
  50. aliases = ['robotframework']
  51. filenames = ['*.robot']
  52. mimetypes = ['text/x-robotframework']
  53. def __init__(self, **options):
  54. options['tabsize'] = 2
  55. options['encoding'] = 'UTF-8'
  56. Lexer.__init__(self, **options)
  57. def get_tokens_unprocessed(self, text):
  58. row_tokenizer = RowTokenizer()
  59. var_tokenizer = VariableTokenizer()
  60. index = 0
  61. for row in text.splitlines():
  62. for value, token in row_tokenizer.tokenize(row):
  63. for value, token in var_tokenizer.tokenize(value, token):
  64. if value:
  65. yield index, token, str(value)
  66. index += len(value)
  67. class VariableTokenizer:
  68. def tokenize(self, string, token):
  69. var = VariableSplitter(string, identifiers='$@%&')
  70. if var.start < 0 or token in (COMMENT, ERROR):
  71. yield string, token
  72. return
  73. for value, token in self._tokenize(var, string, token):
  74. if value:
  75. yield value, token
  76. def _tokenize(self, var, string, orig_token):
  77. before = string[:var.start]
  78. yield before, orig_token
  79. yield var.identifier + '{', SYNTAX
  80. yield from self.tokenize(var.base, VARIABLE)
  81. yield '}', SYNTAX
  82. if var.index:
  83. yield '[', SYNTAX
  84. yield from self.tokenize(var.index, VARIABLE)
  85. yield ']', SYNTAX
  86. yield from self.tokenize(string[var.end:], orig_token)
  87. class RowTokenizer:
  88. def __init__(self):
  89. self._table = UnknownTable()
  90. self._splitter = RowSplitter()
  91. testcases = TestCaseTable()
  92. settings = SettingTable(testcases.set_default_template)
  93. variables = VariableTable()
  94. keywords = KeywordTable()
  95. self._tables = {'settings': settings, 'setting': settings,
  96. 'metadata': settings,
  97. 'variables': variables, 'variable': variables,
  98. 'testcases': testcases, 'testcase': testcases,
  99. 'tasks': testcases, 'task': testcases,
  100. 'keywords': keywords, 'keyword': keywords,
  101. 'userkeywords': keywords, 'userkeyword': keywords}
  102. def tokenize(self, row):
  103. commented = False
  104. heading = False
  105. for index, value in enumerate(self._splitter.split(row)):
  106. # First value, and every second after that, is a separator.
  107. index, separator = divmod(index-1, 2)
  108. if value.startswith('#'):
  109. commented = True
  110. elif index == 0 and value.startswith('*'):
  111. self._table = self._start_table(value)
  112. heading = True
  113. yield from self._tokenize(value, index, commented,
  114. separator, heading)
  115. self._table.end_row()
  116. def _start_table(self, header):
  117. name = normalize(header, remove='*')
  118. return self._tables.get(name, UnknownTable())
  119. def _tokenize(self, value, index, commented, separator, heading):
  120. if commented:
  121. yield value, COMMENT
  122. elif separator:
  123. yield value, SEPARATOR
  124. elif heading:
  125. yield value, HEADING
  126. else:
  127. yield from self._table.tokenize(value, index)
  128. class RowSplitter:
  129. _space_splitter = re.compile('( {2,})')
  130. _pipe_splitter = re.compile(r'((?:^| +)\|(?: +|$))')
  131. def split(self, row):
  132. splitter = (row.startswith('| ') and self._split_from_pipes
  133. or self._split_from_spaces)
  134. yield from splitter(row)
  135. yield '\n'
  136. def _split_from_spaces(self, row):
  137. yield '' # Start with (pseudo)separator similarly as with pipes
  138. yield from self._space_splitter.split(row)
  139. def _split_from_pipes(self, row):
  140. _, separator, rest = self._pipe_splitter.split(row, 1)
  141. yield separator
  142. while self._pipe_splitter.search(rest):
  143. cell, separator, rest = self._pipe_splitter.split(rest, 1)
  144. yield cell
  145. yield separator
  146. yield rest
  147. class Tokenizer:
  148. _tokens = None
  149. def __init__(self):
  150. self._index = 0
  151. def tokenize(self, value):
  152. values_and_tokens = self._tokenize(value, self._index)
  153. self._index += 1
  154. if isinstance(values_and_tokens, type(Token)):
  155. values_and_tokens = [(value, values_and_tokens)]
  156. return values_and_tokens
  157. def _tokenize(self, value, index):
  158. index = min(index, len(self._tokens) - 1)
  159. return self._tokens[index]
  160. def _is_assign(self, value):
  161. if value.endswith('='):
  162. value = value[:-1].strip()
  163. var = VariableSplitter(value, identifiers='$@&')
  164. return var.start == 0 and var.end == len(value)
  165. class Comment(Tokenizer):
  166. _tokens = (COMMENT,)
  167. class Setting(Tokenizer):
  168. _tokens = (SETTING, ARGUMENT)
  169. _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
  170. 'suitepostcondition', 'testsetup', 'tasksetup', 'testprecondition',
  171. 'testteardown','taskteardown', 'testpostcondition', 'testtemplate', 'tasktemplate')
  172. _import_settings = ('library', 'resource', 'variables')
  173. _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
  174. 'testtimeout','tasktimeout')
  175. _custom_tokenizer = None
  176. def __init__(self, template_setter=None):
  177. Tokenizer.__init__(self)
  178. self._template_setter = template_setter
  179. def _tokenize(self, value, index):
  180. if index == 1 and self._template_setter:
  181. self._template_setter(value)
  182. if index == 0:
  183. normalized = normalize(value)
  184. if normalized in self._keyword_settings:
  185. self._custom_tokenizer = KeywordCall(support_assign=False)
  186. elif normalized in self._import_settings:
  187. self._custom_tokenizer = ImportSetting()
  188. elif normalized not in self._other_settings:
  189. return ERROR
  190. elif self._custom_tokenizer:
  191. return self._custom_tokenizer.tokenize(value)
  192. return Tokenizer._tokenize(self, value, index)
  193. class ImportSetting(Tokenizer):
  194. _tokens = (IMPORT, ARGUMENT)
  195. class TestCaseSetting(Setting):
  196. _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
  197. 'template')
  198. _import_settings = ()
  199. _other_settings = ('documentation', 'tags', 'timeout')
  200. def _tokenize(self, value, index):
  201. if index == 0:
  202. type = Setting._tokenize(self, value[1:-1], index)
  203. return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
  204. return Setting._tokenize(self, value, index)
  205. class KeywordSetting(TestCaseSetting):
  206. _keyword_settings = ('teardown',)
  207. _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags')
  208. class Variable(Tokenizer):
  209. _tokens = (SYNTAX, ARGUMENT)
  210. def _tokenize(self, value, index):
  211. if index == 0 and not self._is_assign(value):
  212. return ERROR
  213. return Tokenizer._tokenize(self, value, index)
  214. class KeywordCall(Tokenizer):
  215. _tokens = (KEYWORD, ARGUMENT)
  216. def __init__(self, support_assign=True):
  217. Tokenizer.__init__(self)
  218. self._keyword_found = not support_assign
  219. self._assigns = 0
  220. def _tokenize(self, value, index):
  221. if not self._keyword_found and self._is_assign(value):
  222. self._assigns += 1
  223. return SYNTAX # VariableTokenizer tokenizes this later.
  224. if self._keyword_found:
  225. return Tokenizer._tokenize(self, value, index - self._assigns)
  226. self._keyword_found = True
  227. return GherkinTokenizer().tokenize(value, KEYWORD)
  228. class GherkinTokenizer:
  229. _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
  230. def tokenize(self, value, token):
  231. match = self._gherkin_prefix.match(value)
  232. if not match:
  233. return [(value, token)]
  234. end = match.end()
  235. return [(value[:end], GHERKIN), (value[end:], token)]
  236. class TemplatedKeywordCall(Tokenizer):
  237. _tokens = (ARGUMENT,)
  238. class ForLoop(Tokenizer):
  239. def __init__(self):
  240. Tokenizer.__init__(self)
  241. self._in_arguments = False
  242. def _tokenize(self, value, index):
  243. token = self._in_arguments and ARGUMENT or SYNTAX
  244. if value.upper() in ('IN', 'IN RANGE'):
  245. self._in_arguments = True
  246. return token
  247. class _Table:
  248. _tokenizer_class = None
  249. def __init__(self, prev_tokenizer=None):
  250. self._tokenizer = self._tokenizer_class()
  251. self._prev_tokenizer = prev_tokenizer
  252. self._prev_values_on_row = []
  253. def tokenize(self, value, index):
  254. if self._continues(value, index):
  255. self._tokenizer = self._prev_tokenizer
  256. yield value, SYNTAX
  257. else:
  258. yield from self._tokenize(value, index)
  259. self._prev_values_on_row.append(value)
  260. def _continues(self, value, index):
  261. return value == '...' and all(self._is_empty(t)
  262. for t in self._prev_values_on_row)
  263. def _is_empty(self, value):
  264. return value in ('', '\\')
  265. def _tokenize(self, value, index):
  266. return self._tokenizer.tokenize(value)
  267. def end_row(self):
  268. self.__init__(prev_tokenizer=self._tokenizer)
  269. class UnknownTable(_Table):
  270. _tokenizer_class = Comment
  271. def _continues(self, value, index):
  272. return False
  273. class VariableTable(_Table):
  274. _tokenizer_class = Variable
  275. class SettingTable(_Table):
  276. _tokenizer_class = Setting
  277. def __init__(self, template_setter, prev_tokenizer=None):
  278. _Table.__init__(self, prev_tokenizer)
  279. self._template_setter = template_setter
  280. def _tokenize(self, value, index):
  281. if index == 0 and normalize(value) == 'testtemplate':
  282. self._tokenizer = Setting(self._template_setter)
  283. return _Table._tokenize(self, value, index)
  284. def end_row(self):
  285. self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
  286. class TestCaseTable(_Table):
  287. _setting_class = TestCaseSetting
  288. _test_template = None
  289. _default_template = None
  290. @property
  291. def _tokenizer_class(self):
  292. if self._test_template or (self._default_template and
  293. self._test_template is not False):
  294. return TemplatedKeywordCall
  295. return KeywordCall
  296. def _continues(self, value, index):
  297. return index > 0 and _Table._continues(self, value, index)
  298. def _tokenize(self, value, index):
  299. if index == 0:
  300. if value:
  301. self._test_template = None
  302. return GherkinTokenizer().tokenize(value, TC_KW_NAME)
  303. if index == 1 and self._is_setting(value):
  304. if self._is_template(value):
  305. self._test_template = False
  306. self._tokenizer = self._setting_class(self.set_test_template)
  307. else:
  308. self._tokenizer = self._setting_class()
  309. if index == 1 and self._is_for_loop(value):
  310. self._tokenizer = ForLoop()
  311. if index == 1 and self._is_empty(value):
  312. return [(value, SYNTAX)]
  313. return _Table._tokenize(self, value, index)
  314. def _is_setting(self, value):
  315. return value.startswith('[') and value.endswith(']')
  316. def _is_template(self, value):
  317. return normalize(value) == '[template]'
  318. def _is_for_loop(self, value):
  319. return value.startswith(':') and normalize(value, remove=':') == 'for'
  320. def set_test_template(self, template):
  321. self._test_template = self._is_template_set(template)
  322. def set_default_template(self, template):
  323. self._default_template = self._is_template_set(template)
  324. def _is_template_set(self, template):
  325. return normalize(template) not in ('', '\\', 'none', '${empty}')
  326. class KeywordTable(TestCaseTable):
  327. _tokenizer_class = KeywordCall
  328. _setting_class = KeywordSetting
  329. def _is_template(self, value):
  330. return False
  331. # Following code copied directly from Robot Framework 2.7.5.
  332. class VariableSplitter:
  333. def __init__(self, string, identifiers):
  334. self.identifier = None
  335. self.base = None
  336. self.index = None
  337. self.start = -1
  338. self.end = -1
  339. self._identifiers = identifiers
  340. self._may_have_internal_variables = False
  341. try:
  342. self._split(string)
  343. except ValueError:
  344. pass
  345. else:
  346. self._finalize()
  347. def get_replaced_base(self, variables):
  348. if self._may_have_internal_variables:
  349. return variables.replace_string(self.base)
  350. return self.base
  351. def _finalize(self):
  352. self.identifier = self._variable_chars[0]
  353. self.base = ''.join(self._variable_chars[2:-1])
  354. self.end = self.start + len(self._variable_chars)
  355. if self._has_list_or_dict_variable_index():
  356. self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1])
  357. self.end += len(self._list_and_dict_variable_index_chars)
  358. def _has_list_or_dict_variable_index(self):
  359. return self._list_and_dict_variable_index_chars\
  360. and self._list_and_dict_variable_index_chars[-1] == ']'
  361. def _split(self, string):
  362. start_index, max_index = self._find_variable(string)
  363. self.start = start_index
  364. self._open_curly = 1
  365. self._state = self._variable_state
  366. self._variable_chars = [string[start_index], '{']
  367. self._list_and_dict_variable_index_chars = []
  368. self._string = string
  369. start_index += 2
  370. for index, char in enumerate(string[start_index:]):
  371. index += start_index # Giving start to enumerate only in Py 2.6+
  372. try:
  373. self._state(char, index)
  374. except StopIteration:
  375. return
  376. if index == max_index and not self._scanning_list_variable_index():
  377. return
  378. def _scanning_list_variable_index(self):
  379. return self._state in [self._waiting_list_variable_index_state,
  380. self._list_variable_index_state]
  381. def _find_variable(self, string):
  382. max_end_index = string.rfind('}')
  383. if max_end_index == -1:
  384. raise ValueError('No variable end found')
  385. if self._is_escaped(string, max_end_index):
  386. return self._find_variable(string[:max_end_index])
  387. start_index = self._find_start_index(string, 1, max_end_index)
  388. if start_index == -1:
  389. raise ValueError('No variable start found')
  390. return start_index, max_end_index
  391. def _find_start_index(self, string, start, end):
  392. index = string.find('{', start, end) - 1
  393. if index < 0:
  394. return -1
  395. if self._start_index_is_ok(string, index):
  396. return index
  397. return self._find_start_index(string, index+2, end)
  398. def _start_index_is_ok(self, string, index):
  399. return string[index] in self._identifiers\
  400. and not self._is_escaped(string, index)
  401. def _is_escaped(self, string, index):
  402. escaped = False
  403. while index > 0 and string[index-1] == '\\':
  404. index -= 1
  405. escaped = not escaped
  406. return escaped
  407. def _variable_state(self, char, index):
  408. self._variable_chars.append(char)
  409. if char == '}' and not self._is_escaped(self._string, index):
  410. self._open_curly -= 1
  411. if self._open_curly == 0:
  412. if not self._is_list_or_dict_variable():
  413. raise StopIteration
  414. self._state = self._waiting_list_variable_index_state
  415. elif char in self._identifiers:
  416. self._state = self._internal_variable_start_state
  417. def _is_list_or_dict_variable(self):
  418. return self._variable_chars[0] in ('@','&')
  419. def _internal_variable_start_state(self, char, index):
  420. self._state = self._variable_state
  421. if char == '{':
  422. self._variable_chars.append(char)
  423. self._open_curly += 1
  424. self._may_have_internal_variables = True
  425. else:
  426. self._variable_state(char, index)
  427. def _waiting_list_variable_index_state(self, char, index):
  428. if char != '[':
  429. raise StopIteration
  430. self._list_and_dict_variable_index_chars.append(char)
  431. self._state = self._list_variable_index_state
  432. def _list_variable_index_state(self, char, index):
  433. self._list_and_dict_variable_index_chars.append(char)
  434. if char == ']':
  435. raise StopIteration