localization.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. """
  2. Helpers for configuring locale settings.
  3. Name `localization` is chosen to avoid overlap with builtin `locale` module.
  4. """
  5. from contextlib import contextmanager
  6. import locale
  7. import re
  8. import subprocess
  9. from pandas._config.config import options
  10. @contextmanager
  11. def set_locale(new_locale, lc_var: int = locale.LC_ALL):
  12. """
  13. Context manager for temporarily setting a locale.
  14. Parameters
  15. ----------
  16. new_locale : str or tuple
  17. A string of the form <language_country>.<encoding>. For example to set
  18. the current locale to US English with a UTF8 encoding, you would pass
  19. "en_US.UTF-8".
  20. lc_var : int, default `locale.LC_ALL`
  21. The category of the locale being set.
  22. Notes
  23. -----
  24. This is useful when you want to run a particular block of code under a
  25. particular locale, without globally setting the locale. This probably isn't
  26. thread-safe.
  27. """
  28. current_locale = locale.getlocale()
  29. try:
  30. locale.setlocale(lc_var, new_locale)
  31. normalized_locale = locale.getlocale()
  32. if all(x is not None for x in normalized_locale):
  33. yield ".".join(normalized_locale)
  34. else:
  35. yield new_locale
  36. finally:
  37. locale.setlocale(lc_var, current_locale)
  38. def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
  39. """
  40. Check to see if we can set a locale, and subsequently get the locale,
  41. without raising an Exception.
  42. Parameters
  43. ----------
  44. lc : str
  45. The locale to attempt to set.
  46. lc_var : int, default `locale.LC_ALL`
  47. The category of the locale being set.
  48. Returns
  49. -------
  50. bool
  51. Whether the passed locale can be set
  52. """
  53. try:
  54. with set_locale(lc, lc_var=lc_var):
  55. pass
  56. except (ValueError, locale.Error):
  57. # horrible name for a Exception subclass
  58. return False
  59. else:
  60. return True
  61. def _valid_locales(locales, normalize):
  62. """
  63. Return a list of normalized locales that do not throw an ``Exception``
  64. when set.
  65. Parameters
  66. ----------
  67. locales : str
  68. A string where each locale is separated by a newline.
  69. normalize : bool
  70. Whether to call ``locale.normalize`` on each locale.
  71. Returns
  72. -------
  73. valid_locales : list
  74. A list of valid locales.
  75. """
  76. return [
  77. loc
  78. for loc in (
  79. locale.normalize(loc.strip()) if normalize else loc.strip()
  80. for loc in locales
  81. )
  82. if can_set_locale(loc)
  83. ]
  84. def _default_locale_getter():
  85. return subprocess.check_output(["locale -a"], shell=True)
  86. def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter):
  87. """
  88. Get all the locales that are available on the system.
  89. Parameters
  90. ----------
  91. prefix : str
  92. If not ``None`` then return only those locales with the prefix
  93. provided. For example to get all English language locales (those that
  94. start with ``"en"``), pass ``prefix="en"``.
  95. normalize : bool
  96. Call ``locale.normalize`` on the resulting list of available locales.
  97. If ``True``, only locales that can be set without throwing an
  98. ``Exception`` are returned.
  99. locale_getter : callable
  100. The function to use to retrieve the current locales. This should return
  101. a string with each locale separated by a newline character.
  102. Returns
  103. -------
  104. locales : list of strings
  105. A list of locale strings that can be set with ``locale.setlocale()``.
  106. For example::
  107. locale.setlocale(locale.LC_ALL, locale_string)
  108. On error will return None (no locale available, e.g. Windows)
  109. """
  110. try:
  111. raw_locales = locale_getter()
  112. except subprocess.CalledProcessError:
  113. # Raised on (some? all?) Windows platforms because Note: "locale -a"
  114. # is not defined
  115. return None
  116. try:
  117. # raw_locales is "\n" separated list of locales
  118. # it may contain non-decodable parts, so split
  119. # extract what we can and then rejoin.
  120. raw_locales = raw_locales.split(b"\n")
  121. out_locales = []
  122. for x in raw_locales:
  123. try:
  124. out_locales.append(str(x, encoding=options.display.encoding))
  125. except UnicodeError:
  126. # 'locale -a' is used to populated 'raw_locales' and on
  127. # Redhat 7 Linux (and maybe others) prints locale names
  128. # using windows-1252 encoding. Bug only triggered by
  129. # a few special characters and when there is an
  130. # extensive list of installed locales.
  131. out_locales.append(str(x, encoding="windows-1252"))
  132. except TypeError:
  133. pass
  134. if prefix is None:
  135. return _valid_locales(out_locales, normalize)
  136. pattern = re.compile(f"{prefix}.*")
  137. found = pattern.findall("\n".join(out_locales))
  138. return _valid_locales(found, normalize)