quality_unicode.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import re
  2. import fnmatch
  3. message_unicode_B = \
  4. "File contains a unicode character : %s, line %s. " \
  5. "But not in the whitelist. " \
  6. "Add the file to the whitelist in " + __file__
  7. message_unicode_D = \
  8. "File does not contain a unicode character : %s." \
  9. "but is in the whitelist. " \
  10. "Remove the file from the whitelist in " + __file__
  11. encoding_header_re = re.compile(
  12. r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)')
  13. # Whitelist pattern for files which can have unicode.
  14. unicode_whitelist = [
  15. # Author names can include non-ASCII characters
  16. r'*/bin/authors_update.py',
  17. r'*/bin/mailmap_check.py',
  18. # These files have functions and test functions for unicode input and
  19. # output.
  20. r'*/sympy/testing/tests/test_code_quality.py',
  21. r'*/sympy/physics/vector/tests/test_printing.py',
  22. r'*/physics/quantum/tests/test_printing.py',
  23. r'*/sympy/vector/tests/test_printing.py',
  24. r'*/sympy/parsing/tests/test_sympy_parser.py',
  25. r'*/sympy/printing/pretty/tests/test_pretty.py',
  26. r'*/sympy/printing/tests/test_conventions.py',
  27. r'*/sympy/printing/tests/test_preview.py',
  28. r'*/liealgebras/type_g.py',
  29. r'*/liealgebras/weyl_group.py',
  30. r'*/liealgebras/tests/test_type_G.py',
  31. # wigner.py and polarization.py have unicode doctests. These probably
  32. # don't need to be there but some of the examples that are there are
  33. # pretty ugly without use_unicode (matrices need to be wrapped across
  34. # multiple lines etc)
  35. r'*/sympy/physics/wigner.py',
  36. r'*/sympy/physics/optics/polarization.py',
  37. # joint.py uses some unicode for variable names in the docstrings
  38. r'*/sympy/physics/mechanics/joint.py',
  39. ]
  40. unicode_strict_whitelist = [
  41. r'*/sympy/parsing/latex/_antlr/__init__.py',
  42. ]
  43. def _test_this_file_encoding(
  44. fname, test_file,
  45. unicode_whitelist=unicode_whitelist,
  46. unicode_strict_whitelist=unicode_strict_whitelist):
  47. """Test helper function for unicode test
  48. The test may have to operate on filewise manner, so it had moved
  49. to a separate process.
  50. """
  51. has_unicode = False
  52. is_in_whitelist = False
  53. is_in_strict_whitelist = False
  54. for patt in unicode_whitelist:
  55. if fnmatch.fnmatch(fname, patt):
  56. is_in_whitelist = True
  57. break
  58. for patt in unicode_strict_whitelist:
  59. if fnmatch.fnmatch(fname, patt):
  60. is_in_strict_whitelist = True
  61. is_in_whitelist = True
  62. break
  63. if is_in_whitelist:
  64. for idx, line in enumerate(test_file):
  65. try:
  66. line.encode(encoding='ascii')
  67. except (UnicodeEncodeError, UnicodeDecodeError):
  68. has_unicode = True
  69. if not has_unicode and not is_in_strict_whitelist:
  70. assert False, message_unicode_D % fname
  71. else:
  72. for idx, line in enumerate(test_file):
  73. try:
  74. line.encode(encoding='ascii')
  75. except (UnicodeEncodeError, UnicodeDecodeError):
  76. assert False, message_unicode_B % (fname, idx + 1)