LaTeX.g4 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /*
  2. ANTLR4 LaTeX Math Grammar
  3. Ported from latex2sympy by @augustt198 https://github.com/augustt198/latex2sympy See license in
  4. LICENSE.txt
  5. */
  6. /*
  7. After changing this file, it is necessary to run `python setup.py antlr` in the root directory of
  8. the repository. This will regenerate the code in `sympy/parsing/latex/_antlr/*.py`.
  9. */
  10. grammar LaTeX;
  11. options {
  12. language = Python2;
  13. }
  14. WS: [ \t\r\n]+ -> skip;
  15. THINSPACE: ('\\,' | '\\thinspace') -> skip;
  16. MEDSPACE: ('\\:' | '\\medspace') -> skip;
  17. THICKSPACE: ('\\;' | '\\thickspace') -> skip;
  18. QUAD: '\\quad' -> skip;
  19. QQUAD: '\\qquad' -> skip;
  20. NEGTHINSPACE: ('\\!' | '\\negthinspace') -> skip;
  21. NEGMEDSPACE: '\\negmedspace' -> skip;
  22. NEGTHICKSPACE: '\\negthickspace' -> skip;
  23. CMD_LEFT: '\\left' -> skip;
  24. CMD_RIGHT: '\\right' -> skip;
  25. IGNORE:
  26. (
  27. '\\vrule'
  28. | '\\vcenter'
  29. | '\\vbox'
  30. | '\\vskip'
  31. | '\\vspace'
  32. | '\\hfil'
  33. | '\\*'
  34. | '\\-'
  35. | '\\.'
  36. | '\\/'
  37. | '\\"'
  38. | '\\('
  39. | '\\='
  40. ) -> skip;
  41. ADD: '+';
  42. SUB: '-';
  43. MUL: '*';
  44. DIV: '/';
  45. L_PAREN: '(';
  46. R_PAREN: ')';
  47. L_BRACE: '{';
  48. R_BRACE: '}';
  49. L_BRACE_LITERAL: '\\{';
  50. R_BRACE_LITERAL: '\\}';
  51. L_BRACKET: '[';
  52. R_BRACKET: ']';
  53. BAR: '|';
  54. R_BAR: '\\right|';
  55. L_BAR: '\\left|';
  56. L_ANGLE: '\\langle';
  57. R_ANGLE: '\\rangle';
  58. FUNC_LIM: '\\lim';
  59. LIM_APPROACH_SYM:
  60. '\\to'
  61. | '\\rightarrow'
  62. | '\\Rightarrow'
  63. | '\\longrightarrow'
  64. | '\\Longrightarrow';
  65. FUNC_INT: '\\int';
  66. FUNC_SUM: '\\sum';
  67. FUNC_PROD: '\\prod';
  68. FUNC_EXP: '\\exp';
  69. FUNC_LOG: '\\log';
  70. FUNC_LN: '\\ln';
  71. FUNC_SIN: '\\sin';
  72. FUNC_COS: '\\cos';
  73. FUNC_TAN: '\\tan';
  74. FUNC_CSC: '\\csc';
  75. FUNC_SEC: '\\sec';
  76. FUNC_COT: '\\cot';
  77. FUNC_ARCSIN: '\\arcsin';
  78. FUNC_ARCCOS: '\\arccos';
  79. FUNC_ARCTAN: '\\arctan';
  80. FUNC_ARCCSC: '\\arccsc';
  81. FUNC_ARCSEC: '\\arcsec';
  82. FUNC_ARCCOT: '\\arccot';
  83. FUNC_SINH: '\\sinh';
  84. FUNC_COSH: '\\cosh';
  85. FUNC_TANH: '\\tanh';
  86. FUNC_ARSINH: '\\arsinh';
  87. FUNC_ARCOSH: '\\arcosh';
  88. FUNC_ARTANH: '\\artanh';
  89. L_FLOOR: '\\lfloor';
  90. R_FLOOR: '\\rfloor';
  91. L_CEIL: '\\lceil';
  92. R_CEIL: '\\rceil';
  93. FUNC_SQRT: '\\sqrt';
  94. FUNC_OVERLINE: '\\overline';
  95. CMD_TIMES: '\\times';
  96. CMD_CDOT: '\\cdot';
  97. CMD_DIV: '\\div';
  98. CMD_FRAC: '\\frac';
  99. CMD_BINOM: '\\binom';
  100. CMD_DBINOM: '\\dbinom';
  101. CMD_TBINOM: '\\tbinom';
  102. CMD_MATHIT: '\\mathit';
  103. UNDERSCORE: '_';
  104. CARET: '^';
  105. COLON: ':';
  106. fragment WS_CHAR: [ \t\r\n];
  107. DIFFERENTIAL: 'd' WS_CHAR*? ([a-zA-Z] | '\\' [a-zA-Z]+);
  108. LETTER: [a-zA-Z];
  109. fragment DIGIT: [0-9];
  110. NUMBER:
  111. DIGIT+ (',' DIGIT DIGIT DIGIT)*
  112. | DIGIT* (',' DIGIT DIGIT DIGIT)* '.' DIGIT+;
  113. EQUAL: (('&' WS_CHAR*?)? '=') | ('=' (WS_CHAR*? '&')?);
  114. NEQ: '\\neq';
  115. LT: '<';
  116. LTE: ('\\leq' | '\\le' | LTE_Q | LTE_S);
  117. LTE_Q: '\\leqq';
  118. LTE_S: '\\leqslant';
  119. GT: '>';
  120. GTE: ('\\geq' | '\\ge' | GTE_Q | GTE_S);
  121. GTE_Q: '\\geqq';
  122. GTE_S: '\\geqslant';
  123. BANG: '!';
  124. SYMBOL: '\\' [a-zA-Z]+;
  125. math: relation;
  126. relation:
  127. relation (EQUAL | LT | LTE | GT | GTE | NEQ) relation
  128. | expr;
  129. equality: expr EQUAL expr;
  130. expr: additive;
  131. additive: additive (ADD | SUB) additive | mp;
  132. // mult part
  133. mp:
  134. mp (MUL | CMD_TIMES | CMD_CDOT | DIV | CMD_DIV | COLON) mp
  135. | unary;
  136. mp_nofunc:
  137. mp_nofunc (
  138. MUL
  139. | CMD_TIMES
  140. | CMD_CDOT
  141. | DIV
  142. | CMD_DIV
  143. | COLON
  144. ) mp_nofunc
  145. | unary_nofunc;
  146. unary: (ADD | SUB) unary | postfix+;
  147. unary_nofunc:
  148. (ADD | SUB) unary_nofunc
  149. | postfix postfix_nofunc*;
  150. postfix: exp postfix_op*;
  151. postfix_nofunc: exp_nofunc postfix_op*;
  152. postfix_op: BANG | eval_at;
  153. eval_at:
  154. BAR (eval_at_sup | eval_at_sub | eval_at_sup eval_at_sub);
  155. eval_at_sub: UNDERSCORE L_BRACE (expr | equality) R_BRACE;
  156. eval_at_sup: CARET L_BRACE (expr | equality) R_BRACE;
  157. exp: exp CARET (atom | L_BRACE expr R_BRACE) subexpr? | comp;
  158. exp_nofunc:
  159. exp_nofunc CARET (atom | L_BRACE expr R_BRACE) subexpr?
  160. | comp_nofunc;
  161. comp:
  162. group
  163. | abs_group
  164. | func
  165. | atom
  166. | frac
  167. | binom
  168. | floor
  169. | ceil;
  170. comp_nofunc:
  171. group
  172. | abs_group
  173. | atom
  174. | frac
  175. | binom
  176. | floor
  177. | ceil;
  178. group:
  179. L_PAREN expr R_PAREN
  180. | L_BRACKET expr R_BRACKET
  181. | L_BRACE expr R_BRACE
  182. | L_BRACE_LITERAL expr R_BRACE_LITERAL;
  183. abs_group: BAR expr BAR;
  184. atom: (LETTER | SYMBOL) subexpr?
  185. | NUMBER
  186. | DIFFERENTIAL
  187. | mathit
  188. | bra
  189. | ket;
  190. bra: L_ANGLE expr (R_BAR | BAR);
  191. ket: (L_BAR | BAR) expr R_ANGLE;
  192. mathit: CMD_MATHIT L_BRACE mathit_text R_BRACE;
  193. mathit_text: LETTER*;
  194. frac:
  195. CMD_FRAC L_BRACE upper = expr R_BRACE L_BRACE lower = expr R_BRACE;
  196. binom:
  197. (CMD_BINOM | CMD_DBINOM | CMD_TBINOM) L_BRACE n = expr R_BRACE L_BRACE k = expr R_BRACE;
  198. floor: L_FLOOR val = expr R_FLOOR;
  199. ceil: L_CEIL val = expr R_CEIL;
  200. func_normal:
  201. FUNC_EXP
  202. | FUNC_LOG
  203. | FUNC_LN
  204. | FUNC_SIN
  205. | FUNC_COS
  206. | FUNC_TAN
  207. | FUNC_CSC
  208. | FUNC_SEC
  209. | FUNC_COT
  210. | FUNC_ARCSIN
  211. | FUNC_ARCCOS
  212. | FUNC_ARCTAN
  213. | FUNC_ARCCSC
  214. | FUNC_ARCSEC
  215. | FUNC_ARCCOT
  216. | FUNC_SINH
  217. | FUNC_COSH
  218. | FUNC_TANH
  219. | FUNC_ARSINH
  220. | FUNC_ARCOSH
  221. | FUNC_ARTANH;
  222. func:
  223. func_normal (subexpr? supexpr? | supexpr? subexpr?) (
  224. L_PAREN func_arg R_PAREN
  225. | func_arg_noparens
  226. )
  227. | (LETTER | SYMBOL) subexpr? // e.g. f(x)
  228. L_PAREN args R_PAREN
  229. | FUNC_INT (subexpr supexpr | supexpr subexpr)? (
  230. additive? DIFFERENTIAL
  231. | frac
  232. | additive
  233. )
  234. | FUNC_SQRT (L_BRACKET root = expr R_BRACKET)? L_BRACE base = expr R_BRACE
  235. | FUNC_OVERLINE L_BRACE base = expr R_BRACE
  236. | (FUNC_SUM | FUNC_PROD) (subeq supexpr | supexpr subeq) mp
  237. | FUNC_LIM limit_sub mp;
  238. args: (expr ',' args) | expr;
  239. limit_sub:
  240. UNDERSCORE L_BRACE (LETTER | SYMBOL) LIM_APPROACH_SYM expr (
  241. CARET L_BRACE (ADD | SUB) R_BRACE
  242. )? R_BRACE;
  243. func_arg: expr | (expr ',' func_arg);
  244. func_arg_noparens: mp_nofunc;
  245. subexpr: UNDERSCORE (atom | L_BRACE expr R_BRACE);
  246. supexpr: CARET (atom | L_BRACE expr R_BRACE);
  247. subeq: UNDERSCORE L_BRACE equality R_BRACE;
  248. supeq: UNDERSCORE L_BRACE equality R_BRACE;