locale.py 76 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755
  1. """Locale support module.
  2. The module provides low-level access to the C lib's locale APIs and adds high
  3. level number formatting APIs as well as a locale aliasing engine to complement
  4. these.
  5. The aliasing engine includes support for many commonly used locale names and
  6. maps them to values suitable for passing to the C lib's setlocale() function. It
  7. also includes default encodings for all supported locale names.
  8. """
  9. import sys
  10. import encodings
  11. import encodings.aliases
  12. import re
  13. import _collections_abc
  14. from builtins import str as _builtin_str
  15. import functools
  16. # Try importing the _locale module.
  17. #
  18. # If this fails, fall back on a basic 'C' locale emulation.
  19. # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
  20. # trying the import. So __all__ is also fiddled at the end of the file.
  21. __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
  22. "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
  23. "str", "atof", "atoi", "format", "format_string", "currency",
  24. "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
  25. "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
  26. def _strcoll(a,b):
  27. """ strcoll(string,string) -> int.
  28. Compares two strings according to the locale.
  29. """
  30. return (a > b) - (a < b)
  31. def _strxfrm(s):
  32. """ strxfrm(string) -> string.
  33. Returns a string that behaves for cmp locale-aware.
  34. """
  35. return s
  36. try:
  37. from _locale import *
  38. except ImportError:
  39. # Locale emulation
  40. CHAR_MAX = 127
  41. LC_ALL = 6
  42. LC_COLLATE = 3
  43. LC_CTYPE = 0
  44. LC_MESSAGES = 5
  45. LC_MONETARY = 4
  46. LC_NUMERIC = 1
  47. LC_TIME = 2
  48. Error = ValueError
  49. def localeconv():
  50. """ localeconv() -> dict.
  51. Returns numeric and monetary locale-specific parameters.
  52. """
  53. # 'C' locale default values
  54. return {'grouping': [127],
  55. 'currency_symbol': '',
  56. 'n_sign_posn': 127,
  57. 'p_cs_precedes': 127,
  58. 'n_cs_precedes': 127,
  59. 'mon_grouping': [],
  60. 'n_sep_by_space': 127,
  61. 'decimal_point': '.',
  62. 'negative_sign': '',
  63. 'positive_sign': '',
  64. 'p_sep_by_space': 127,
  65. 'int_curr_symbol': '',
  66. 'p_sign_posn': 127,
  67. 'thousands_sep': '',
  68. 'mon_thousands_sep': '',
  69. 'frac_digits': 127,
  70. 'mon_decimal_point': '',
  71. 'int_frac_digits': 127}
  72. def setlocale(category, value=None):
  73. """ setlocale(integer,string=None) -> string.
  74. Activates/queries locale processing.
  75. """
  76. if value not in (None, '', 'C'):
  77. raise Error('_locale emulation only supports "C" locale')
  78. return 'C'
  79. # These may or may not exist in _locale, so be sure to set them.
  80. if 'strxfrm' not in globals():
  81. strxfrm = _strxfrm
  82. if 'strcoll' not in globals():
  83. strcoll = _strcoll
  84. _localeconv = localeconv
  85. # With this dict, you can override some items of localeconv's return value.
  86. # This is useful for testing purposes.
  87. _override_localeconv = {}
  88. @functools.wraps(_localeconv)
  89. def localeconv():
  90. d = _localeconv()
  91. if _override_localeconv:
  92. d.update(_override_localeconv)
  93. return d
  94. ### Number formatting APIs
  95. # Author: Martin von Loewis
  96. # improved by Georg Brandl
  97. # Iterate over grouping intervals
  98. def _grouping_intervals(grouping):
  99. last_interval = None
  100. for interval in grouping:
  101. # if grouping is -1, we are done
  102. if interval == CHAR_MAX:
  103. return
  104. # 0: re-use last group ad infinitum
  105. if interval == 0:
  106. if last_interval is None:
  107. raise ValueError("invalid grouping")
  108. while True:
  109. yield last_interval
  110. yield interval
  111. last_interval = interval
  112. #perform the grouping from right to left
  113. def _group(s, monetary=False):
  114. conv = localeconv()
  115. thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
  116. grouping = conv[monetary and 'mon_grouping' or 'grouping']
  117. if not grouping:
  118. return (s, 0)
  119. if s[-1] == ' ':
  120. stripped = s.rstrip()
  121. right_spaces = s[len(stripped):]
  122. s = stripped
  123. else:
  124. right_spaces = ''
  125. left_spaces = ''
  126. groups = []
  127. for interval in _grouping_intervals(grouping):
  128. if not s or s[-1] not in "0123456789":
  129. # only non-digit characters remain (sign, spaces)
  130. left_spaces = s
  131. s = ''
  132. break
  133. groups.append(s[-interval:])
  134. s = s[:-interval]
  135. if s:
  136. groups.append(s)
  137. groups.reverse()
  138. return (
  139. left_spaces + thousands_sep.join(groups) + right_spaces,
  140. len(thousands_sep) * (len(groups) - 1)
  141. )
  142. # Strip a given amount of excess padding from the given string
  143. def _strip_padding(s, amount):
  144. lpos = 0
  145. while amount and s[lpos] == ' ':
  146. lpos += 1
  147. amount -= 1
  148. rpos = len(s) - 1
  149. while amount and s[rpos] == ' ':
  150. rpos -= 1
  151. amount -= 1
  152. return s[lpos:rpos+1]
  153. _percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
  154. r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
  155. def _format(percent, value, grouping=False, monetary=False, *additional):
  156. if additional:
  157. formatted = percent % ((value,) + additional)
  158. else:
  159. formatted = percent % value
  160. # floats and decimal ints need special action!
  161. if percent[-1] in 'eEfFgG':
  162. seps = 0
  163. parts = formatted.split('.')
  164. if grouping:
  165. parts[0], seps = _group(parts[0], monetary=monetary)
  166. decimal_point = localeconv()[monetary and 'mon_decimal_point'
  167. or 'decimal_point']
  168. formatted = decimal_point.join(parts)
  169. if seps:
  170. formatted = _strip_padding(formatted, seps)
  171. elif percent[-1] in 'diu':
  172. seps = 0
  173. if grouping:
  174. formatted, seps = _group(formatted, monetary=monetary)
  175. if seps:
  176. formatted = _strip_padding(formatted, seps)
  177. return formatted
  178. def format_string(f, val, grouping=False, monetary=False):
  179. """Formats a string in the same way that the % formatting would use,
  180. but takes the current locale into account.
  181. Grouping is applied if the third parameter is true.
  182. Conversion uses monetary thousands separator and grouping strings if
  183. forth parameter monetary is true."""
  184. percents = list(_percent_re.finditer(f))
  185. new_f = _percent_re.sub('%s', f)
  186. if isinstance(val, _collections_abc.Mapping):
  187. new_val = []
  188. for perc in percents:
  189. if perc.group()[-1]=='%':
  190. new_val.append('%')
  191. else:
  192. new_val.append(_format(perc.group(), val, grouping, monetary))
  193. else:
  194. if not isinstance(val, tuple):
  195. val = (val,)
  196. new_val = []
  197. i = 0
  198. for perc in percents:
  199. if perc.group()[-1]=='%':
  200. new_val.append('%')
  201. else:
  202. starcount = perc.group('modifiers').count('*')
  203. new_val.append(_format(perc.group(),
  204. val[i],
  205. grouping,
  206. monetary,
  207. *val[i+1:i+1+starcount]))
  208. i += (1 + starcount)
  209. val = tuple(new_val)
  210. return new_f % val
  211. def format(percent, value, grouping=False, monetary=False, *additional):
  212. """Deprecated, use format_string instead."""
  213. import warnings
  214. warnings.warn(
  215. "This method will be removed in a future version of Python. "
  216. "Use 'locale.format_string()' instead.",
  217. DeprecationWarning, stacklevel=2
  218. )
  219. match = _percent_re.match(percent)
  220. if not match or len(match.group())!= len(percent):
  221. raise ValueError(("format() must be given exactly one %%char "
  222. "format specifier, %s not valid") % repr(percent))
  223. return _format(percent, value, grouping, monetary, *additional)
  224. def currency(val, symbol=True, grouping=False, international=False):
  225. """Formats val according to the currency settings
  226. in the current locale."""
  227. conv = localeconv()
  228. # check for illegal values
  229. digits = conv[international and 'int_frac_digits' or 'frac_digits']
  230. if digits == 127:
  231. raise ValueError("Currency formatting is not possible using "
  232. "the 'C' locale.")
  233. s = _format('%%.%if' % digits, abs(val), grouping, monetary=True)
  234. # '<' and '>' are markers if the sign must be inserted between symbol and value
  235. s = '<' + s + '>'
  236. if symbol:
  237. smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
  238. precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
  239. separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
  240. if precedes:
  241. s = smb + (separated and ' ' or '') + s
  242. else:
  243. if international and smb[-1] == ' ':
  244. smb = smb[:-1]
  245. s = s + (separated and ' ' or '') + smb
  246. sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
  247. sign = conv[val<0 and 'negative_sign' or 'positive_sign']
  248. if sign_pos == 0:
  249. s = '(' + s + ')'
  250. elif sign_pos == 1:
  251. s = sign + s
  252. elif sign_pos == 2:
  253. s = s + sign
  254. elif sign_pos == 3:
  255. s = s.replace('<', sign)
  256. elif sign_pos == 4:
  257. s = s.replace('>', sign)
  258. else:
  259. # the default if nothing specified;
  260. # this should be the most fitting sign position
  261. s = sign + s
  262. return s.replace('<', '').replace('>', '')
  263. def str(val):
  264. """Convert float to string, taking the locale into account."""
  265. return _format("%.12g", val)
  266. def delocalize(string):
  267. "Parses a string as a normalized number according to the locale settings."
  268. conv = localeconv()
  269. #First, get rid of the grouping
  270. ts = conv['thousands_sep']
  271. if ts:
  272. string = string.replace(ts, '')
  273. #next, replace the decimal point with a dot
  274. dd = conv['decimal_point']
  275. if dd:
  276. string = string.replace(dd, '.')
  277. return string
  278. def atof(string, func=float):
  279. "Parses a string as a float according to the locale settings."
  280. return func(delocalize(string))
  281. def atoi(string):
  282. "Converts a string to an integer according to the locale settings."
  283. return int(delocalize(string))
  284. def _test():
  285. setlocale(LC_ALL, "")
  286. #do grouping
  287. s1 = format_string("%d", 123456789,1)
  288. print(s1, "is", atoi(s1))
  289. #standard formatting
  290. s1 = str(3.14)
  291. print(s1, "is", atof(s1))
  292. ### Locale name aliasing engine
  293. # Author: Marc-Andre Lemburg, mal@lemburg.com
  294. # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
  295. # store away the low-level version of setlocale (it's
  296. # overridden below)
  297. _setlocale = setlocale
  298. def _replace_encoding(code, encoding):
  299. if '.' in code:
  300. langname = code[:code.index('.')]
  301. else:
  302. langname = code
  303. # Convert the encoding to a C lib compatible encoding string
  304. norm_encoding = encodings.normalize_encoding(encoding)
  305. #print('norm encoding: %r' % norm_encoding)
  306. norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
  307. norm_encoding)
  308. #print('aliased encoding: %r' % norm_encoding)
  309. encoding = norm_encoding
  310. norm_encoding = norm_encoding.lower()
  311. if norm_encoding in locale_encoding_alias:
  312. encoding = locale_encoding_alias[norm_encoding]
  313. else:
  314. norm_encoding = norm_encoding.replace('_', '')
  315. norm_encoding = norm_encoding.replace('-', '')
  316. if norm_encoding in locale_encoding_alias:
  317. encoding = locale_encoding_alias[norm_encoding]
  318. #print('found encoding %r' % encoding)
  319. return langname + '.' + encoding
  320. def _append_modifier(code, modifier):
  321. if modifier == 'euro':
  322. if '.' not in code:
  323. return code + '.ISO8859-15'
  324. _, _, encoding = code.partition('.')
  325. if encoding in ('ISO8859-15', 'UTF-8'):
  326. return code
  327. if encoding == 'ISO8859-1':
  328. return _replace_encoding(code, 'ISO8859-15')
  329. return code + '@' + modifier
  330. def normalize(localename):
  331. """ Returns a normalized locale code for the given locale
  332. name.
  333. The returned locale code is formatted for use with
  334. setlocale().
  335. If normalization fails, the original name is returned
  336. unchanged.
  337. If the given encoding is not known, the function defaults to
  338. the default encoding for the locale code just like setlocale()
  339. does.
  340. """
  341. # Normalize the locale name and extract the encoding and modifier
  342. code = localename.lower()
  343. if ':' in code:
  344. # ':' is sometimes used as encoding delimiter.
  345. code = code.replace(':', '.')
  346. if '@' in code:
  347. code, modifier = code.split('@', 1)
  348. else:
  349. modifier = ''
  350. if '.' in code:
  351. langname, encoding = code.split('.')[:2]
  352. else:
  353. langname = code
  354. encoding = ''
  355. # First lookup: fullname (possibly with encoding and modifier)
  356. lang_enc = langname
  357. if encoding:
  358. norm_encoding = encoding.replace('-', '')
  359. norm_encoding = norm_encoding.replace('_', '')
  360. lang_enc += '.' + norm_encoding
  361. lookup_name = lang_enc
  362. if modifier:
  363. lookup_name += '@' + modifier
  364. code = locale_alias.get(lookup_name, None)
  365. if code is not None:
  366. return code
  367. #print('first lookup failed')
  368. if modifier:
  369. # Second try: fullname without modifier (possibly with encoding)
  370. code = locale_alias.get(lang_enc, None)
  371. if code is not None:
  372. #print('lookup without modifier succeeded')
  373. if '@' not in code:
  374. return _append_modifier(code, modifier)
  375. if code.split('@', 1)[1].lower() == modifier:
  376. return code
  377. #print('second lookup failed')
  378. if encoding:
  379. # Third try: langname (without encoding, possibly with modifier)
  380. lookup_name = langname
  381. if modifier:
  382. lookup_name += '@' + modifier
  383. code = locale_alias.get(lookup_name, None)
  384. if code is not None:
  385. #print('lookup without encoding succeeded')
  386. if '@' not in code:
  387. return _replace_encoding(code, encoding)
  388. code, modifier = code.split('@', 1)
  389. return _replace_encoding(code, encoding) + '@' + modifier
  390. if modifier:
  391. # Fourth try: langname (without encoding and modifier)
  392. code = locale_alias.get(langname, None)
  393. if code is not None:
  394. #print('lookup without modifier and encoding succeeded')
  395. if '@' not in code:
  396. code = _replace_encoding(code, encoding)
  397. return _append_modifier(code, modifier)
  398. code, defmod = code.split('@', 1)
  399. if defmod.lower() == modifier:
  400. return _replace_encoding(code, encoding) + '@' + defmod
  401. return localename
  402. def _parse_localename(localename):
  403. """ Parses the locale code for localename and returns the
  404. result as tuple (language code, encoding).
  405. The localename is normalized and passed through the locale
  406. alias engine. A ValueError is raised in case the locale name
  407. cannot be parsed.
  408. The language code corresponds to RFC 1766. code and encoding
  409. can be None in case the values cannot be determined or are
  410. unknown to this implementation.
  411. """
  412. code = normalize(localename)
  413. if '@' in code:
  414. # Deal with locale modifiers
  415. code, modifier = code.split('@', 1)
  416. if modifier == 'euro' and '.' not in code:
  417. # Assume Latin-9 for @euro locales. This is bogus,
  418. # since some systems may use other encodings for these
  419. # locales. Also, we ignore other modifiers.
  420. return code, 'iso-8859-15'
  421. if '.' in code:
  422. return tuple(code.split('.')[:2])
  423. elif code == 'C':
  424. return None, None
  425. elif code == 'UTF-8':
  426. # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
  427. # for getting UTF-8 handling for text.
  428. return None, 'UTF-8'
  429. raise ValueError('unknown locale: %s' % localename)
  430. def _build_localename(localetuple):
  431. """ Builds a locale code from the given tuple (language code,
  432. encoding).
  433. No aliasing or normalizing takes place.
  434. """
  435. try:
  436. language, encoding = localetuple
  437. if language is None:
  438. language = 'C'
  439. if encoding is None:
  440. return language
  441. else:
  442. return language + '.' + encoding
  443. except (TypeError, ValueError):
  444. raise TypeError('Locale must be None, a string, or an iterable of '
  445. 'two strings -- language code, encoding.') from None
  446. def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
  447. """ Tries to determine the default locale settings and returns
  448. them as tuple (language code, encoding).
  449. According to POSIX, a program which has not called
  450. setlocale(LC_ALL, "") runs using the portable 'C' locale.
  451. Calling setlocale(LC_ALL, "") lets it use the default locale as
  452. defined by the LANG variable. Since we don't want to interfere
  453. with the current locale setting we thus emulate the behavior
  454. in the way described above.
  455. To maintain compatibility with other platforms, not only the
  456. LANG variable is tested, but a list of variables given as
  457. envvars parameter. The first found to be defined will be
  458. used. envvars defaults to the search path used in GNU gettext;
  459. it must always contain the variable name 'LANG'.
  460. Except for the code 'C', the language code corresponds to RFC
  461. 1766. code and encoding can be None in case the values cannot
  462. be determined.
  463. """
  464. try:
  465. # check if it's supported by the _locale module
  466. import _locale
  467. code, encoding = _locale._getdefaultlocale()
  468. except (ImportError, AttributeError):
  469. pass
  470. else:
  471. # make sure the code/encoding values are valid
  472. if sys.platform == "win32" and code and code[:2] == "0x":
  473. # map windows language identifier to language name
  474. code = windows_locale.get(int(code, 0))
  475. # ...add other platform-specific processing here, if
  476. # necessary...
  477. return code, encoding
  478. # fall back on POSIX behaviour
  479. import os
  480. lookup = os.environ.get
  481. for variable in envvars:
  482. localename = lookup(variable,None)
  483. if localename:
  484. if variable == 'LANGUAGE':
  485. localename = localename.split(':')[0]
  486. break
  487. else:
  488. localename = 'C'
  489. return _parse_localename(localename)
  490. def getlocale(category=LC_CTYPE):
  491. """ Returns the current setting for the given locale category as
  492. tuple (language code, encoding).
  493. category may be one of the LC_* value except LC_ALL. It
  494. defaults to LC_CTYPE.
  495. Except for the code 'C', the language code corresponds to RFC
  496. 1766. code and encoding can be None in case the values cannot
  497. be determined.
  498. """
  499. localename = _setlocale(category)
  500. if category == LC_ALL and ';' in localename:
  501. raise TypeError('category LC_ALL is not supported')
  502. return _parse_localename(localename)
  503. def setlocale(category, locale=None):
  504. """ Set the locale for the given category. The locale can be
  505. a string, an iterable of two strings (language code and encoding),
  506. or None.
  507. Iterables are converted to strings using the locale aliasing
  508. engine. Locale strings are passed directly to the C lib.
  509. category may be given as one of the LC_* values.
  510. """
  511. if locale and not isinstance(locale, _builtin_str):
  512. # convert to string
  513. locale = normalize(_build_localename(locale))
  514. return _setlocale(category, locale)
  515. def resetlocale(category=LC_ALL):
  516. """ Sets the locale for category to the default setting.
  517. The default setting is determined by calling
  518. getdefaultlocale(). category defaults to LC_ALL.
  519. """
  520. _setlocale(category, _build_localename(getdefaultlocale()))
  521. if sys.platform.startswith("win"):
  522. # On Win32, this will return the ANSI code page
  523. def getpreferredencoding(do_setlocale = True):
  524. """Return the charset that the user is likely using."""
  525. if sys.flags.utf8_mode:
  526. return 'UTF-8'
  527. import _bootlocale
  528. return _bootlocale.getpreferredencoding(False)
  529. else:
  530. # On Unix, if CODESET is available, use that.
  531. try:
  532. CODESET
  533. except NameError:
  534. if hasattr(sys, 'getandroidapilevel'):
  535. # On Android langinfo.h and CODESET are missing, and UTF-8 is
  536. # always used in mbstowcs() and wcstombs().
  537. def getpreferredencoding(do_setlocale = True):
  538. return 'UTF-8'
  539. else:
  540. # Fall back to parsing environment variables :-(
  541. def getpreferredencoding(do_setlocale = True):
  542. """Return the charset that the user is likely using,
  543. by looking at environment variables."""
  544. if sys.flags.utf8_mode:
  545. return 'UTF-8'
  546. res = getdefaultlocale()[1]
  547. if res is None:
  548. # LANG not set, default conservatively to ASCII
  549. res = 'ascii'
  550. return res
  551. else:
  552. def getpreferredencoding(do_setlocale = True):
  553. """Return the charset that the user is likely using,
  554. according to the system configuration."""
  555. if sys.flags.utf8_mode:
  556. return 'UTF-8'
  557. import _bootlocale
  558. if do_setlocale:
  559. oldloc = setlocale(LC_CTYPE)
  560. try:
  561. setlocale(LC_CTYPE, "")
  562. except Error:
  563. pass
  564. result = _bootlocale.getpreferredencoding(False)
  565. if do_setlocale:
  566. setlocale(LC_CTYPE, oldloc)
  567. return result
  568. ### Database
  569. #
  570. # The following data was extracted from the locale.alias file which
  571. # comes with X11 and then hand edited removing the explicit encoding
  572. # definitions and adding some more aliases. The file is usually
  573. # available as /usr/lib/X11/locale/locale.alias.
  574. #
  575. #
  576. # The local_encoding_alias table maps lowercase encoding alias names
  577. # to C locale encoding names (case-sensitive). Note that normalize()
  578. # first looks up the encoding in the encodings.aliases dictionary and
  579. # then applies this mapping to find the correct C lib name for the
  580. # encoding.
  581. #
  582. locale_encoding_alias = {
  583. # Mappings for non-standard encoding names used in locale names
  584. '437': 'C',
  585. 'c': 'C',
  586. 'en': 'ISO8859-1',
  587. 'jis': 'JIS7',
  588. 'jis7': 'JIS7',
  589. 'ajec': 'eucJP',
  590. 'koi8c': 'KOI8-C',
  591. 'microsoftcp1251': 'CP1251',
  592. 'microsoftcp1255': 'CP1255',
  593. 'microsoftcp1256': 'CP1256',
  594. '88591': 'ISO8859-1',
  595. '88592': 'ISO8859-2',
  596. '88595': 'ISO8859-5',
  597. '885915': 'ISO8859-15',
  598. # Mappings from Python codec names to C lib encoding names
  599. 'ascii': 'ISO8859-1',
  600. 'latin_1': 'ISO8859-1',
  601. 'iso8859_1': 'ISO8859-1',
  602. 'iso8859_10': 'ISO8859-10',
  603. 'iso8859_11': 'ISO8859-11',
  604. 'iso8859_13': 'ISO8859-13',
  605. 'iso8859_14': 'ISO8859-14',
  606. 'iso8859_15': 'ISO8859-15',
  607. 'iso8859_16': 'ISO8859-16',
  608. 'iso8859_2': 'ISO8859-2',
  609. 'iso8859_3': 'ISO8859-3',
  610. 'iso8859_4': 'ISO8859-4',
  611. 'iso8859_5': 'ISO8859-5',
  612. 'iso8859_6': 'ISO8859-6',
  613. 'iso8859_7': 'ISO8859-7',
  614. 'iso8859_8': 'ISO8859-8',
  615. 'iso8859_9': 'ISO8859-9',
  616. 'iso2022_jp': 'JIS7',
  617. 'shift_jis': 'SJIS',
  618. 'tactis': 'TACTIS',
  619. 'euc_jp': 'eucJP',
  620. 'euc_kr': 'eucKR',
  621. 'utf_8': 'UTF-8',
  622. 'koi8_r': 'KOI8-R',
  623. 'koi8_t': 'KOI8-T',
  624. 'koi8_u': 'KOI8-U',
  625. 'kz1048': 'RK1048',
  626. 'cp1251': 'CP1251',
  627. 'cp1255': 'CP1255',
  628. 'cp1256': 'CP1256',
  629. # XXX This list is still incomplete. If you know more
  630. # mappings, please file a bug report. Thanks.
  631. }
  632. for k, v in sorted(locale_encoding_alias.items()):
  633. k = k.replace('_', '')
  634. locale_encoding_alias.setdefault(k, v)
  635. #
  636. # The locale_alias table maps lowercase alias names to C locale names
  637. # (case-sensitive). Encodings are always separated from the locale
  638. # name using a dot ('.'); they should only be given in case the
  639. # language name is needed to interpret the given encoding alias
  640. # correctly (CJK codes often have this need).
  641. #
  642. # Note that the normalize() function which uses this tables
  643. # removes '_' and '-' characters from the encoding part of the
  644. # locale name before doing the lookup. This saves a lot of
  645. # space in the table.
  646. #
  647. # MAL 2004-12-10:
  648. # Updated alias mapping to most recent locale.alias file
  649. # from X.org distribution using makelocalealias.py.
  650. #
  651. # These are the differences compared to the old mapping (Python 2.4
  652. # and older):
  653. #
  654. # updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  655. # updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  656. # updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  657. # updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  658. # updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  659. # updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  660. # updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
  661. # updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  662. # updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  663. # updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  664. # updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  665. # updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  666. # updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  667. # updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
  668. # updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
  669. # updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
  670. # updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  671. # updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  672. # updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
  673. # updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
  674. # updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  675. # updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  676. #
  677. # MAL 2008-05-30:
  678. # Updated alias mapping to most recent locale.alias file
  679. # from X.org distribution using makelocalealias.py.
  680. #
  681. # These are the differences compared to the old mapping (Python 2.5
  682. # and older):
  683. #
  684. # updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
  685. # updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  686. # updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  687. # updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
  688. # updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  689. # updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  690. # updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  691. # updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  692. # updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  693. # updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  694. # updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
  695. # updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  696. # updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  697. # updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  698. # updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  699. # updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  700. # updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  701. # updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
  702. # updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  703. #
  704. # AP 2010-04-12:
  705. # Updated alias mapping to most recent locale.alias file
  706. # from X.org distribution using makelocalealias.py.
  707. #
  708. # These are the differences compared to the old mapping (Python 2.6.5
  709. # and older):
  710. #
  711. # updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  712. # updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  713. # updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  714. # updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  715. # updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  716. # updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  717. # updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  718. # updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  719. # updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
  720. # updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  721. # updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
  722. # updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
  723. # updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  724. #
  725. # SS 2013-12-20:
  726. # Updated alias mapping to most recent locale.alias file
  727. # from X.org distribution using makelocalealias.py.
  728. #
  729. # These are the differences compared to the old mapping (Python 3.3.3
  730. # and older):
  731. #
  732. # updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  733. # updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  734. # updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  735. # updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  736. # updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  737. # updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  738. # updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
  739. # updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  740. # updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
  741. # updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  742. # updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  743. #
  744. # SS 2014-10-01:
  745. # Updated alias mapping with glibc 2.19 supported locales.
  746. #
  747. # SS 2018-05-05:
  748. # Updated alias mapping with glibc 2.27 supported locales.
  749. #
  750. # These are the differences compared to the old mapping (Python 3.6.5
  751. # and older):
  752. #
  753. # updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
  754. # updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
  755. # updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
  756. locale_alias = {
  757. 'a3': 'az_AZ.KOI8-C',
  758. 'a3_az': 'az_AZ.KOI8-C',
  759. 'a3_az.koic': 'az_AZ.KOI8-C',
  760. 'aa_dj': 'aa_DJ.ISO8859-1',
  761. 'aa_er': 'aa_ER.UTF-8',
  762. 'aa_et': 'aa_ET.UTF-8',
  763. 'af': 'af_ZA.ISO8859-1',
  764. 'af_za': 'af_ZA.ISO8859-1',
  765. 'agr_pe': 'agr_PE.UTF-8',
  766. 'ak_gh': 'ak_GH.UTF-8',
  767. 'am': 'am_ET.UTF-8',
  768. 'am_et': 'am_ET.UTF-8',
  769. 'american': 'en_US.ISO8859-1',
  770. 'an_es': 'an_ES.ISO8859-15',
  771. 'anp_in': 'anp_IN.UTF-8',
  772. 'ar': 'ar_AA.ISO8859-6',
  773. 'ar_aa': 'ar_AA.ISO8859-6',
  774. 'ar_ae': 'ar_AE.ISO8859-6',
  775. 'ar_bh': 'ar_BH.ISO8859-6',
  776. 'ar_dz': 'ar_DZ.ISO8859-6',
  777. 'ar_eg': 'ar_EG.ISO8859-6',
  778. 'ar_in': 'ar_IN.UTF-8',
  779. 'ar_iq': 'ar_IQ.ISO8859-6',
  780. 'ar_jo': 'ar_JO.ISO8859-6',
  781. 'ar_kw': 'ar_KW.ISO8859-6',
  782. 'ar_lb': 'ar_LB.ISO8859-6',
  783. 'ar_ly': 'ar_LY.ISO8859-6',
  784. 'ar_ma': 'ar_MA.ISO8859-6',
  785. 'ar_om': 'ar_OM.ISO8859-6',
  786. 'ar_qa': 'ar_QA.ISO8859-6',
  787. 'ar_sa': 'ar_SA.ISO8859-6',
  788. 'ar_sd': 'ar_SD.ISO8859-6',
  789. 'ar_ss': 'ar_SS.UTF-8',
  790. 'ar_sy': 'ar_SY.ISO8859-6',
  791. 'ar_tn': 'ar_TN.ISO8859-6',
  792. 'ar_ye': 'ar_YE.ISO8859-6',
  793. 'arabic': 'ar_AA.ISO8859-6',
  794. 'as': 'as_IN.UTF-8',
  795. 'as_in': 'as_IN.UTF-8',
  796. 'ast_es': 'ast_ES.ISO8859-15',
  797. 'ayc_pe': 'ayc_PE.UTF-8',
  798. 'az': 'az_AZ.ISO8859-9E',
  799. 'az_az': 'az_AZ.ISO8859-9E',
  800. 'az_az.iso88599e': 'az_AZ.ISO8859-9E',
  801. 'az_ir': 'az_IR.UTF-8',
  802. 'be': 'be_BY.CP1251',
  803. 'be@latin': 'be_BY.UTF-8@latin',
  804. 'be_bg.utf8': 'bg_BG.UTF-8',
  805. 'be_by': 'be_BY.CP1251',
  806. 'be_by@latin': 'be_BY.UTF-8@latin',
  807. 'bem_zm': 'bem_ZM.UTF-8',
  808. 'ber_dz': 'ber_DZ.UTF-8',
  809. 'ber_ma': 'ber_MA.UTF-8',
  810. 'bg': 'bg_BG.CP1251',
  811. 'bg_bg': 'bg_BG.CP1251',
  812. 'bhb_in.utf8': 'bhb_IN.UTF-8',
  813. 'bho_in': 'bho_IN.UTF-8',
  814. 'bho_np': 'bho_NP.UTF-8',
  815. 'bi_vu': 'bi_VU.UTF-8',
  816. 'bn_bd': 'bn_BD.UTF-8',
  817. 'bn_in': 'bn_IN.UTF-8',
  818. 'bo_cn': 'bo_CN.UTF-8',
  819. 'bo_in': 'bo_IN.UTF-8',
  820. 'bokmal': 'nb_NO.ISO8859-1',
  821. 'bokm\xe5l': 'nb_NO.ISO8859-1',
  822. 'br': 'br_FR.ISO8859-1',
  823. 'br_fr': 'br_FR.ISO8859-1',
  824. 'brx_in': 'brx_IN.UTF-8',
  825. 'bs': 'bs_BA.ISO8859-2',
  826. 'bs_ba': 'bs_BA.ISO8859-2',
  827. 'bulgarian': 'bg_BG.CP1251',
  828. 'byn_er': 'byn_ER.UTF-8',
  829. 'c': 'C',
  830. 'c-french': 'fr_CA.ISO8859-1',
  831. 'c.ascii': 'C',
  832. 'c.en': 'C',
  833. 'c.iso88591': 'en_US.ISO8859-1',
  834. 'c.utf8': 'en_US.UTF-8',
  835. 'c_c': 'C',
  836. 'c_c.c': 'C',
  837. 'ca': 'ca_ES.ISO8859-1',
  838. 'ca_ad': 'ca_AD.ISO8859-1',
  839. 'ca_es': 'ca_ES.ISO8859-1',
  840. 'ca_es@valencia': 'ca_ES.UTF-8@valencia',
  841. 'ca_fr': 'ca_FR.ISO8859-1',
  842. 'ca_it': 'ca_IT.ISO8859-1',
  843. 'catalan': 'ca_ES.ISO8859-1',
  844. 'ce_ru': 'ce_RU.UTF-8',
  845. 'cextend': 'en_US.ISO8859-1',
  846. 'chinese-s': 'zh_CN.eucCN',
  847. 'chinese-t': 'zh_TW.eucTW',
  848. 'chr_us': 'chr_US.UTF-8',
  849. 'ckb_iq': 'ckb_IQ.UTF-8',
  850. 'cmn_tw': 'cmn_TW.UTF-8',
  851. 'crh_ua': 'crh_UA.UTF-8',
  852. 'croatian': 'hr_HR.ISO8859-2',
  853. 'cs': 'cs_CZ.ISO8859-2',
  854. 'cs_cs': 'cs_CZ.ISO8859-2',
  855. 'cs_cz': 'cs_CZ.ISO8859-2',
  856. 'csb_pl': 'csb_PL.UTF-8',
  857. 'cv_ru': 'cv_RU.UTF-8',
  858. 'cy': 'cy_GB.ISO8859-1',
  859. 'cy_gb': 'cy_GB.ISO8859-1',
  860. 'cz': 'cs_CZ.ISO8859-2',
  861. 'cz_cz': 'cs_CZ.ISO8859-2',
  862. 'czech': 'cs_CZ.ISO8859-2',
  863. 'da': 'da_DK.ISO8859-1',
  864. 'da_dk': 'da_DK.ISO8859-1',
  865. 'danish': 'da_DK.ISO8859-1',
  866. 'dansk': 'da_DK.ISO8859-1',
  867. 'de': 'de_DE.ISO8859-1',
  868. 'de_at': 'de_AT.ISO8859-1',
  869. 'de_be': 'de_BE.ISO8859-1',
  870. 'de_ch': 'de_CH.ISO8859-1',
  871. 'de_de': 'de_DE.ISO8859-1',
  872. 'de_it': 'de_IT.ISO8859-1',
  873. 'de_li.utf8': 'de_LI.UTF-8',
  874. 'de_lu': 'de_LU.ISO8859-1',
  875. 'deutsch': 'de_DE.ISO8859-1',
  876. 'doi_in': 'doi_IN.UTF-8',
  877. 'dutch': 'nl_NL.ISO8859-1',
  878. 'dutch.iso88591': 'nl_BE.ISO8859-1',
  879. 'dv_mv': 'dv_MV.UTF-8',
  880. 'dz_bt': 'dz_BT.UTF-8',
  881. 'ee': 'ee_EE.ISO8859-4',
  882. 'ee_ee': 'ee_EE.ISO8859-4',
  883. 'eesti': 'et_EE.ISO8859-1',
  884. 'el': 'el_GR.ISO8859-7',
  885. 'el_cy': 'el_CY.ISO8859-7',
  886. 'el_gr': 'el_GR.ISO8859-7',
  887. 'el_gr@euro': 'el_GR.ISO8859-15',
  888. 'en': 'en_US.ISO8859-1',
  889. 'en_ag': 'en_AG.UTF-8',
  890. 'en_au': 'en_AU.ISO8859-1',
  891. 'en_be': 'en_BE.ISO8859-1',
  892. 'en_bw': 'en_BW.ISO8859-1',
  893. 'en_ca': 'en_CA.ISO8859-1',
  894. 'en_dk': 'en_DK.ISO8859-1',
  895. 'en_dl.utf8': 'en_DL.UTF-8',
  896. 'en_gb': 'en_GB.ISO8859-1',
  897. 'en_hk': 'en_HK.ISO8859-1',
  898. 'en_ie': 'en_IE.ISO8859-1',
  899. 'en_il': 'en_IL.UTF-8',
  900. 'en_in': 'en_IN.ISO8859-1',
  901. 'en_ng': 'en_NG.UTF-8',
  902. 'en_nz': 'en_NZ.ISO8859-1',
  903. 'en_ph': 'en_PH.ISO8859-1',
  904. 'en_sc.utf8': 'en_SC.UTF-8',
  905. 'en_sg': 'en_SG.ISO8859-1',
  906. 'en_uk': 'en_GB.ISO8859-1',
  907. 'en_us': 'en_US.ISO8859-1',
  908. 'en_us@euro@euro': 'en_US.ISO8859-15',
  909. 'en_za': 'en_ZA.ISO8859-1',
  910. 'en_zm': 'en_ZM.UTF-8',
  911. 'en_zw': 'en_ZW.ISO8859-1',
  912. 'en_zw.utf8': 'en_ZS.UTF-8',
  913. 'eng_gb': 'en_GB.ISO8859-1',
  914. 'english': 'en_EN.ISO8859-1',
  915. 'english.iso88591': 'en_US.ISO8859-1',
  916. 'english_uk': 'en_GB.ISO8859-1',
  917. 'english_united-states': 'en_US.ISO8859-1',
  918. 'english_united-states.437': 'C',
  919. 'english_us': 'en_US.ISO8859-1',
  920. 'eo': 'eo_XX.ISO8859-3',
  921. 'eo.utf8': 'eo.UTF-8',
  922. 'eo_eo': 'eo_EO.ISO8859-3',
  923. 'eo_us.utf8': 'eo_US.UTF-8',
  924. 'eo_xx': 'eo_XX.ISO8859-3',
  925. 'es': 'es_ES.ISO8859-1',
  926. 'es_ar': 'es_AR.ISO8859-1',
  927. 'es_bo': 'es_BO.ISO8859-1',
  928. 'es_cl': 'es_CL.ISO8859-1',
  929. 'es_co': 'es_CO.ISO8859-1',
  930. 'es_cr': 'es_CR.ISO8859-1',
  931. 'es_cu': 'es_CU.UTF-8',
  932. 'es_do': 'es_DO.ISO8859-1',
  933. 'es_ec': 'es_EC.ISO8859-1',
  934. 'es_es': 'es_ES.ISO8859-1',
  935. 'es_gt': 'es_GT.ISO8859-1',
  936. 'es_hn': 'es_HN.ISO8859-1',
  937. 'es_mx': 'es_MX.ISO8859-1',
  938. 'es_ni': 'es_NI.ISO8859-1',
  939. 'es_pa': 'es_PA.ISO8859-1',
  940. 'es_pe': 'es_PE.ISO8859-1',
  941. 'es_pr': 'es_PR.ISO8859-1',
  942. 'es_py': 'es_PY.ISO8859-1',
  943. 'es_sv': 'es_SV.ISO8859-1',
  944. 'es_us': 'es_US.ISO8859-1',
  945. 'es_uy': 'es_UY.ISO8859-1',
  946. 'es_ve': 'es_VE.ISO8859-1',
  947. 'estonian': 'et_EE.ISO8859-1',
  948. 'et': 'et_EE.ISO8859-15',
  949. 'et_ee': 'et_EE.ISO8859-15',
  950. 'eu': 'eu_ES.ISO8859-1',
  951. 'eu_es': 'eu_ES.ISO8859-1',
  952. 'eu_fr': 'eu_FR.ISO8859-1',
  953. 'fa': 'fa_IR.UTF-8',
  954. 'fa_ir': 'fa_IR.UTF-8',
  955. 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342',
  956. 'ff_sn': 'ff_SN.UTF-8',
  957. 'fi': 'fi_FI.ISO8859-15',
  958. 'fi_fi': 'fi_FI.ISO8859-15',
  959. 'fil_ph': 'fil_PH.UTF-8',
  960. 'finnish': 'fi_FI.ISO8859-1',
  961. 'fo': 'fo_FO.ISO8859-1',
  962. 'fo_fo': 'fo_FO.ISO8859-1',
  963. 'fr': 'fr_FR.ISO8859-1',
  964. 'fr_be': 'fr_BE.ISO8859-1',
  965. 'fr_ca': 'fr_CA.ISO8859-1',
  966. 'fr_ch': 'fr_CH.ISO8859-1',
  967. 'fr_fr': 'fr_FR.ISO8859-1',
  968. 'fr_lu': 'fr_LU.ISO8859-1',
  969. 'fran\xe7ais': 'fr_FR.ISO8859-1',
  970. 'fre_fr': 'fr_FR.ISO8859-1',
  971. 'french': 'fr_FR.ISO8859-1',
  972. 'french.iso88591': 'fr_CH.ISO8859-1',
  973. 'french_france': 'fr_FR.ISO8859-1',
  974. 'fur_it': 'fur_IT.UTF-8',
  975. 'fy_de': 'fy_DE.UTF-8',
  976. 'fy_nl': 'fy_NL.UTF-8',
  977. 'ga': 'ga_IE.ISO8859-1',
  978. 'ga_ie': 'ga_IE.ISO8859-1',
  979. 'galego': 'gl_ES.ISO8859-1',
  980. 'galician': 'gl_ES.ISO8859-1',
  981. 'gd': 'gd_GB.ISO8859-1',
  982. 'gd_gb': 'gd_GB.ISO8859-1',
  983. 'ger_de': 'de_DE.ISO8859-1',
  984. 'german': 'de_DE.ISO8859-1',
  985. 'german.iso88591': 'de_CH.ISO8859-1',
  986. 'german_germany': 'de_DE.ISO8859-1',
  987. 'gez_er': 'gez_ER.UTF-8',
  988. 'gez_et': 'gez_ET.UTF-8',
  989. 'gl': 'gl_ES.ISO8859-1',
  990. 'gl_es': 'gl_ES.ISO8859-1',
  991. 'greek': 'el_GR.ISO8859-7',
  992. 'gu_in': 'gu_IN.UTF-8',
  993. 'gv': 'gv_GB.ISO8859-1',
  994. 'gv_gb': 'gv_GB.ISO8859-1',
  995. 'ha_ng': 'ha_NG.UTF-8',
  996. 'hak_tw': 'hak_TW.UTF-8',
  997. 'he': 'he_IL.ISO8859-8',
  998. 'he_il': 'he_IL.ISO8859-8',
  999. 'hebrew': 'he_IL.ISO8859-8',
  1000. 'hi': 'hi_IN.ISCII-DEV',
  1001. 'hi_in': 'hi_IN.ISCII-DEV',
  1002. 'hi_in.isciidev': 'hi_IN.ISCII-DEV',
  1003. 'hif_fj': 'hif_FJ.UTF-8',
  1004. 'hne': 'hne_IN.UTF-8',
  1005. 'hne_in': 'hne_IN.UTF-8',
  1006. 'hr': 'hr_HR.ISO8859-2',
  1007. 'hr_hr': 'hr_HR.ISO8859-2',
  1008. 'hrvatski': 'hr_HR.ISO8859-2',
  1009. 'hsb_de': 'hsb_DE.ISO8859-2',
  1010. 'ht_ht': 'ht_HT.UTF-8',
  1011. 'hu': 'hu_HU.ISO8859-2',
  1012. 'hu_hu': 'hu_HU.ISO8859-2',
  1013. 'hungarian': 'hu_HU.ISO8859-2',
  1014. 'hy_am': 'hy_AM.UTF-8',
  1015. 'hy_am.armscii8': 'hy_AM.ARMSCII_8',
  1016. 'ia': 'ia.UTF-8',
  1017. 'ia_fr': 'ia_FR.UTF-8',
  1018. 'icelandic': 'is_IS.ISO8859-1',
  1019. 'id': 'id_ID.ISO8859-1',
  1020. 'id_id': 'id_ID.ISO8859-1',
  1021. 'ig_ng': 'ig_NG.UTF-8',
  1022. 'ik_ca': 'ik_CA.UTF-8',
  1023. 'in': 'id_ID.ISO8859-1',
  1024. 'in_id': 'id_ID.ISO8859-1',
  1025. 'is': 'is_IS.ISO8859-1',
  1026. 'is_is': 'is_IS.ISO8859-1',
  1027. 'iso-8859-1': 'en_US.ISO8859-1',
  1028. 'iso-8859-15': 'en_US.ISO8859-15',
  1029. 'iso8859-1': 'en_US.ISO8859-1',
  1030. 'iso8859-15': 'en_US.ISO8859-15',
  1031. 'iso_8859_1': 'en_US.ISO8859-1',
  1032. 'iso_8859_15': 'en_US.ISO8859-15',
  1033. 'it': 'it_IT.ISO8859-1',
  1034. 'it_ch': 'it_CH.ISO8859-1',
  1035. 'it_it': 'it_IT.ISO8859-1',
  1036. 'italian': 'it_IT.ISO8859-1',
  1037. 'iu': 'iu_CA.NUNACOM-8',
  1038. 'iu_ca': 'iu_CA.NUNACOM-8',
  1039. 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8',
  1040. 'iw': 'he_IL.ISO8859-8',
  1041. 'iw_il': 'he_IL.ISO8859-8',
  1042. 'iw_il.utf8': 'iw_IL.UTF-8',
  1043. 'ja': 'ja_JP.eucJP',
  1044. 'ja_jp': 'ja_JP.eucJP',
  1045. 'ja_jp.euc': 'ja_JP.eucJP',
  1046. 'ja_jp.mscode': 'ja_JP.SJIS',
  1047. 'ja_jp.pck': 'ja_JP.SJIS',
  1048. 'japan': 'ja_JP.eucJP',
  1049. 'japanese': 'ja_JP.eucJP',
  1050. 'japanese-euc': 'ja_JP.eucJP',
  1051. 'japanese.euc': 'ja_JP.eucJP',
  1052. 'jp_jp': 'ja_JP.eucJP',
  1053. 'ka': 'ka_GE.GEORGIAN-ACADEMY',
  1054. 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY',
  1055. 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
  1056. 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
  1057. 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
  1058. 'kab_dz': 'kab_DZ.UTF-8',
  1059. 'kk_kz': 'kk_KZ.ptcp154',
  1060. 'kl': 'kl_GL.ISO8859-1',
  1061. 'kl_gl': 'kl_GL.ISO8859-1',
  1062. 'km_kh': 'km_KH.UTF-8',
  1063. 'kn': 'kn_IN.UTF-8',
  1064. 'kn_in': 'kn_IN.UTF-8',
  1065. 'ko': 'ko_KR.eucKR',
  1066. 'ko_kr': 'ko_KR.eucKR',
  1067. 'ko_kr.euc': 'ko_KR.eucKR',
  1068. 'kok_in': 'kok_IN.UTF-8',
  1069. 'korean': 'ko_KR.eucKR',
  1070. 'korean.euc': 'ko_KR.eucKR',
  1071. 'ks': 'ks_IN.UTF-8',
  1072. 'ks_in': 'ks_IN.UTF-8',
  1073. 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari',
  1074. 'ku_tr': 'ku_TR.ISO8859-9',
  1075. 'kw': 'kw_GB.ISO8859-1',
  1076. 'kw_gb': 'kw_GB.ISO8859-1',
  1077. 'ky': 'ky_KG.UTF-8',
  1078. 'ky_kg': 'ky_KG.UTF-8',
  1079. 'lb_lu': 'lb_LU.UTF-8',
  1080. 'lg_ug': 'lg_UG.ISO8859-10',
  1081. 'li_be': 'li_BE.UTF-8',
  1082. 'li_nl': 'li_NL.UTF-8',
  1083. 'lij_it': 'lij_IT.UTF-8',
  1084. 'lithuanian': 'lt_LT.ISO8859-13',
  1085. 'ln_cd': 'ln_CD.UTF-8',
  1086. 'lo': 'lo_LA.MULELAO-1',
  1087. 'lo_la': 'lo_LA.MULELAO-1',
  1088. 'lo_la.cp1133': 'lo_LA.IBM-CP1133',
  1089. 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133',
  1090. 'lo_la.mulelao1': 'lo_LA.MULELAO-1',
  1091. 'lt': 'lt_LT.ISO8859-13',
  1092. 'lt_lt': 'lt_LT.ISO8859-13',
  1093. 'lv': 'lv_LV.ISO8859-13',
  1094. 'lv_lv': 'lv_LV.ISO8859-13',
  1095. 'lzh_tw': 'lzh_TW.UTF-8',
  1096. 'mag_in': 'mag_IN.UTF-8',
  1097. 'mai': 'mai_IN.UTF-8',
  1098. 'mai_in': 'mai_IN.UTF-8',
  1099. 'mai_np': 'mai_NP.UTF-8',
  1100. 'mfe_mu': 'mfe_MU.UTF-8',
  1101. 'mg_mg': 'mg_MG.ISO8859-15',
  1102. 'mhr_ru': 'mhr_RU.UTF-8',
  1103. 'mi': 'mi_NZ.ISO8859-1',
  1104. 'mi_nz': 'mi_NZ.ISO8859-1',
  1105. 'miq_ni': 'miq_NI.UTF-8',
  1106. 'mjw_in': 'mjw_IN.UTF-8',
  1107. 'mk': 'mk_MK.ISO8859-5',
  1108. 'mk_mk': 'mk_MK.ISO8859-5',
  1109. 'ml': 'ml_IN.UTF-8',
  1110. 'ml_in': 'ml_IN.UTF-8',
  1111. 'mn_mn': 'mn_MN.UTF-8',
  1112. 'mni_in': 'mni_IN.UTF-8',
  1113. 'mr': 'mr_IN.UTF-8',
  1114. 'mr_in': 'mr_IN.UTF-8',
  1115. 'ms': 'ms_MY.ISO8859-1',
  1116. 'ms_my': 'ms_MY.ISO8859-1',
  1117. 'mt': 'mt_MT.ISO8859-3',
  1118. 'mt_mt': 'mt_MT.ISO8859-3',
  1119. 'my_mm': 'my_MM.UTF-8',
  1120. 'nan_tw': 'nan_TW.UTF-8',
  1121. 'nb': 'nb_NO.ISO8859-1',
  1122. 'nb_no': 'nb_NO.ISO8859-1',
  1123. 'nds_de': 'nds_DE.UTF-8',
  1124. 'nds_nl': 'nds_NL.UTF-8',
  1125. 'ne_np': 'ne_NP.UTF-8',
  1126. 'nhn_mx': 'nhn_MX.UTF-8',
  1127. 'niu_nu': 'niu_NU.UTF-8',
  1128. 'niu_nz': 'niu_NZ.UTF-8',
  1129. 'nl': 'nl_NL.ISO8859-1',
  1130. 'nl_aw': 'nl_AW.UTF-8',
  1131. 'nl_be': 'nl_BE.ISO8859-1',
  1132. 'nl_nl': 'nl_NL.ISO8859-1',
  1133. 'nn': 'nn_NO.ISO8859-1',
  1134. 'nn_no': 'nn_NO.ISO8859-1',
  1135. 'no': 'no_NO.ISO8859-1',
  1136. 'no@nynorsk': 'ny_NO.ISO8859-1',
  1137. 'no_no': 'no_NO.ISO8859-1',
  1138. 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1',
  1139. 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1',
  1140. 'norwegian': 'no_NO.ISO8859-1',
  1141. 'nr': 'nr_ZA.ISO8859-1',
  1142. 'nr_za': 'nr_ZA.ISO8859-1',
  1143. 'nso': 'nso_ZA.ISO8859-15',
  1144. 'nso_za': 'nso_ZA.ISO8859-15',
  1145. 'ny': 'ny_NO.ISO8859-1',
  1146. 'ny_no': 'ny_NO.ISO8859-1',
  1147. 'nynorsk': 'nn_NO.ISO8859-1',
  1148. 'oc': 'oc_FR.ISO8859-1',
  1149. 'oc_fr': 'oc_FR.ISO8859-1',
  1150. 'om_et': 'om_ET.UTF-8',
  1151. 'om_ke': 'om_KE.ISO8859-1',
  1152. 'or': 'or_IN.UTF-8',
  1153. 'or_in': 'or_IN.UTF-8',
  1154. 'os_ru': 'os_RU.UTF-8',
  1155. 'pa': 'pa_IN.UTF-8',
  1156. 'pa_in': 'pa_IN.UTF-8',
  1157. 'pa_pk': 'pa_PK.UTF-8',
  1158. 'pap_an': 'pap_AN.UTF-8',
  1159. 'pap_aw': 'pap_AW.UTF-8',
  1160. 'pap_cw': 'pap_CW.UTF-8',
  1161. 'pd': 'pd_US.ISO8859-1',
  1162. 'pd_de': 'pd_DE.ISO8859-1',
  1163. 'pd_us': 'pd_US.ISO8859-1',
  1164. 'ph': 'ph_PH.ISO8859-1',
  1165. 'ph_ph': 'ph_PH.ISO8859-1',
  1166. 'pl': 'pl_PL.ISO8859-2',
  1167. 'pl_pl': 'pl_PL.ISO8859-2',
  1168. 'polish': 'pl_PL.ISO8859-2',
  1169. 'portuguese': 'pt_PT.ISO8859-1',
  1170. 'portuguese_brazil': 'pt_BR.ISO8859-1',
  1171. 'posix': 'C',
  1172. 'posix-utf2': 'C',
  1173. 'pp': 'pp_AN.ISO8859-1',
  1174. 'pp_an': 'pp_AN.ISO8859-1',
  1175. 'ps_af': 'ps_AF.UTF-8',
  1176. 'pt': 'pt_PT.ISO8859-1',
  1177. 'pt_br': 'pt_BR.ISO8859-1',
  1178. 'pt_pt': 'pt_PT.ISO8859-1',
  1179. 'quz_pe': 'quz_PE.UTF-8',
  1180. 'raj_in': 'raj_IN.UTF-8',
  1181. 'ro': 'ro_RO.ISO8859-2',
  1182. 'ro_ro': 'ro_RO.ISO8859-2',
  1183. 'romanian': 'ro_RO.ISO8859-2',
  1184. 'ru': 'ru_RU.UTF-8',
  1185. 'ru_ru': 'ru_RU.UTF-8',
  1186. 'ru_ua': 'ru_UA.KOI8-U',
  1187. 'rumanian': 'ro_RO.ISO8859-2',
  1188. 'russian': 'ru_RU.KOI8-R',
  1189. 'rw': 'rw_RW.ISO8859-1',
  1190. 'rw_rw': 'rw_RW.ISO8859-1',
  1191. 'sa_in': 'sa_IN.UTF-8',
  1192. 'sat_in': 'sat_IN.UTF-8',
  1193. 'sc_it': 'sc_IT.UTF-8',
  1194. 'sd': 'sd_IN.UTF-8',
  1195. 'sd_in': 'sd_IN.UTF-8',
  1196. 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari',
  1197. 'sd_pk': 'sd_PK.UTF-8',
  1198. 'se_no': 'se_NO.UTF-8',
  1199. 'serbocroatian': 'sr_RS.UTF-8@latin',
  1200. 'sgs_lt': 'sgs_LT.UTF-8',
  1201. 'sh': 'sr_RS.UTF-8@latin',
  1202. 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2',
  1203. 'sh_hr': 'sh_HR.ISO8859-2',
  1204. 'sh_hr.iso88592': 'hr_HR.ISO8859-2',
  1205. 'sh_sp': 'sr_CS.ISO8859-2',
  1206. 'sh_yu': 'sr_RS.UTF-8@latin',
  1207. 'shn_mm': 'shn_MM.UTF-8',
  1208. 'shs_ca': 'shs_CA.UTF-8',
  1209. 'si': 'si_LK.UTF-8',
  1210. 'si_lk': 'si_LK.UTF-8',
  1211. 'sid_et': 'sid_ET.UTF-8',
  1212. 'sinhala': 'si_LK.UTF-8',
  1213. 'sk': 'sk_SK.ISO8859-2',
  1214. 'sk_sk': 'sk_SK.ISO8859-2',
  1215. 'sl': 'sl_SI.ISO8859-2',
  1216. 'sl_cs': 'sl_CS.ISO8859-2',
  1217. 'sl_si': 'sl_SI.ISO8859-2',
  1218. 'slovak': 'sk_SK.ISO8859-2',
  1219. 'slovene': 'sl_SI.ISO8859-2',
  1220. 'slovenian': 'sl_SI.ISO8859-2',
  1221. 'sm_ws': 'sm_WS.UTF-8',
  1222. 'so_dj': 'so_DJ.ISO8859-1',
  1223. 'so_et': 'so_ET.UTF-8',
  1224. 'so_ke': 'so_KE.ISO8859-1',
  1225. 'so_so': 'so_SO.ISO8859-1',
  1226. 'sp': 'sr_CS.ISO8859-5',
  1227. 'sp_yu': 'sr_CS.ISO8859-5',
  1228. 'spanish': 'es_ES.ISO8859-1',
  1229. 'spanish_spain': 'es_ES.ISO8859-1',
  1230. 'sq': 'sq_AL.ISO8859-2',
  1231. 'sq_al': 'sq_AL.ISO8859-2',
  1232. 'sq_mk': 'sq_MK.UTF-8',
  1233. 'sr': 'sr_RS.UTF-8',
  1234. 'sr@cyrillic': 'sr_RS.UTF-8',
  1235. 'sr@latn': 'sr_CS.UTF-8@latin',
  1236. 'sr_cs': 'sr_CS.UTF-8',
  1237. 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2',
  1238. 'sr_cs@latn': 'sr_CS.UTF-8@latin',
  1239. 'sr_me': 'sr_ME.UTF-8',
  1240. 'sr_rs': 'sr_RS.UTF-8',
  1241. 'sr_rs@latn': 'sr_RS.UTF-8@latin',
  1242. 'sr_sp': 'sr_CS.ISO8859-2',
  1243. 'sr_yu': 'sr_RS.UTF-8@latin',
  1244. 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251',
  1245. 'sr_yu.iso88592': 'sr_CS.ISO8859-2',
  1246. 'sr_yu.iso88595': 'sr_CS.ISO8859-5',
  1247. 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5',
  1248. 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251',
  1249. 'sr_yu.utf8': 'sr_RS.UTF-8',
  1250. 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8',
  1251. 'sr_yu@cyrillic': 'sr_RS.UTF-8',
  1252. 'ss': 'ss_ZA.ISO8859-1',
  1253. 'ss_za': 'ss_ZA.ISO8859-1',
  1254. 'st': 'st_ZA.ISO8859-1',
  1255. 'st_za': 'st_ZA.ISO8859-1',
  1256. 'sv': 'sv_SE.ISO8859-1',
  1257. 'sv_fi': 'sv_FI.ISO8859-1',
  1258. 'sv_se': 'sv_SE.ISO8859-1',
  1259. 'sw_ke': 'sw_KE.UTF-8',
  1260. 'sw_tz': 'sw_TZ.UTF-8',
  1261. 'swedish': 'sv_SE.ISO8859-1',
  1262. 'szl_pl': 'szl_PL.UTF-8',
  1263. 'ta': 'ta_IN.TSCII-0',
  1264. 'ta_in': 'ta_IN.TSCII-0',
  1265. 'ta_in.tscii': 'ta_IN.TSCII-0',
  1266. 'ta_in.tscii0': 'ta_IN.TSCII-0',
  1267. 'ta_lk': 'ta_LK.UTF-8',
  1268. 'tcy_in.utf8': 'tcy_IN.UTF-8',
  1269. 'te': 'te_IN.UTF-8',
  1270. 'te_in': 'te_IN.UTF-8',
  1271. 'tg': 'tg_TJ.KOI8-C',
  1272. 'tg_tj': 'tg_TJ.KOI8-C',
  1273. 'th': 'th_TH.ISO8859-11',
  1274. 'th_th': 'th_TH.ISO8859-11',
  1275. 'th_th.tactis': 'th_TH.TIS620',
  1276. 'th_th.tis620': 'th_TH.TIS620',
  1277. 'thai': 'th_TH.ISO8859-11',
  1278. 'the_np': 'the_NP.UTF-8',
  1279. 'ti_er': 'ti_ER.UTF-8',
  1280. 'ti_et': 'ti_ET.UTF-8',
  1281. 'tig_er': 'tig_ER.UTF-8',
  1282. 'tk_tm': 'tk_TM.UTF-8',
  1283. 'tl': 'tl_PH.ISO8859-1',
  1284. 'tl_ph': 'tl_PH.ISO8859-1',
  1285. 'tn': 'tn_ZA.ISO8859-15',
  1286. 'tn_za': 'tn_ZA.ISO8859-15',
  1287. 'to_to': 'to_TO.UTF-8',
  1288. 'tpi_pg': 'tpi_PG.UTF-8',
  1289. 'tr': 'tr_TR.ISO8859-9',
  1290. 'tr_cy': 'tr_CY.ISO8859-9',
  1291. 'tr_tr': 'tr_TR.ISO8859-9',
  1292. 'ts': 'ts_ZA.ISO8859-1',
  1293. 'ts_za': 'ts_ZA.ISO8859-1',
  1294. 'tt': 'tt_RU.TATAR-CYR',
  1295. 'tt_ru': 'tt_RU.TATAR-CYR',
  1296. 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR',
  1297. 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif',
  1298. 'turkish': 'tr_TR.ISO8859-9',
  1299. 'ug_cn': 'ug_CN.UTF-8',
  1300. 'uk': 'uk_UA.KOI8-U',
  1301. 'uk_ua': 'uk_UA.KOI8-U',
  1302. 'univ': 'en_US.utf',
  1303. 'universal': 'en_US.utf',
  1304. 'universal.utf8@ucs4': 'en_US.UTF-8',
  1305. 'unm_us': 'unm_US.UTF-8',
  1306. 'ur': 'ur_PK.CP1256',
  1307. 'ur_in': 'ur_IN.UTF-8',
  1308. 'ur_pk': 'ur_PK.CP1256',
  1309. 'uz': 'uz_UZ.UTF-8',
  1310. 'uz_uz': 'uz_UZ.UTF-8',
  1311. 'uz_uz@cyrillic': 'uz_UZ.UTF-8',
  1312. 've': 've_ZA.UTF-8',
  1313. 've_za': 've_ZA.UTF-8',
  1314. 'vi': 'vi_VN.TCVN',
  1315. 'vi_vn': 'vi_VN.TCVN',
  1316. 'vi_vn.tcvn': 'vi_VN.TCVN',
  1317. 'vi_vn.tcvn5712': 'vi_VN.TCVN',
  1318. 'vi_vn.viscii': 'vi_VN.VISCII',
  1319. 'vi_vn.viscii111': 'vi_VN.VISCII',
  1320. 'wa': 'wa_BE.ISO8859-1',
  1321. 'wa_be': 'wa_BE.ISO8859-1',
  1322. 'wae_ch': 'wae_CH.UTF-8',
  1323. 'wal_et': 'wal_ET.UTF-8',
  1324. 'wo_sn': 'wo_SN.UTF-8',
  1325. 'xh': 'xh_ZA.ISO8859-1',
  1326. 'xh_za': 'xh_ZA.ISO8859-1',
  1327. 'yi': 'yi_US.CP1255',
  1328. 'yi_us': 'yi_US.CP1255',
  1329. 'yo_ng': 'yo_NG.UTF-8',
  1330. 'yue_hk': 'yue_HK.UTF-8',
  1331. 'yuw_pg': 'yuw_PG.UTF-8',
  1332. 'zh': 'zh_CN.eucCN',
  1333. 'zh_cn': 'zh_CN.gb2312',
  1334. 'zh_cn.big5': 'zh_TW.big5',
  1335. 'zh_cn.euc': 'zh_CN.eucCN',
  1336. 'zh_hk': 'zh_HK.big5hkscs',
  1337. 'zh_hk.big5hk': 'zh_HK.big5hkscs',
  1338. 'zh_sg': 'zh_SG.GB2312',
  1339. 'zh_sg.gbk': 'zh_SG.GBK',
  1340. 'zh_tw': 'zh_TW.big5',
  1341. 'zh_tw.euc': 'zh_TW.eucTW',
  1342. 'zh_tw.euctw': 'zh_TW.eucTW',
  1343. 'zu': 'zu_ZA.ISO8859-1',
  1344. 'zu_za': 'zu_ZA.ISO8859-1',
  1345. }
  1346. #
  1347. # This maps Windows language identifiers to locale strings.
  1348. #
  1349. # This list has been updated from
  1350. # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
  1351. # to include every locale up to Windows Vista.
  1352. #
  1353. # NOTE: this mapping is incomplete. If your language is missing, please
  1354. # submit a bug report to the Python bug tracker at http://bugs.python.org/
  1355. # Make sure you include the missing language identifier and the suggested
  1356. # locale code.
  1357. #
  1358. windows_locale = {
  1359. 0x0436: "af_ZA", # Afrikaans
  1360. 0x041c: "sq_AL", # Albanian
  1361. 0x0484: "gsw_FR",# Alsatian - France
  1362. 0x045e: "am_ET", # Amharic - Ethiopia
  1363. 0x0401: "ar_SA", # Arabic - Saudi Arabia
  1364. 0x0801: "ar_IQ", # Arabic - Iraq
  1365. 0x0c01: "ar_EG", # Arabic - Egypt
  1366. 0x1001: "ar_LY", # Arabic - Libya
  1367. 0x1401: "ar_DZ", # Arabic - Algeria
  1368. 0x1801: "ar_MA", # Arabic - Morocco
  1369. 0x1c01: "ar_TN", # Arabic - Tunisia
  1370. 0x2001: "ar_OM", # Arabic - Oman
  1371. 0x2401: "ar_YE", # Arabic - Yemen
  1372. 0x2801: "ar_SY", # Arabic - Syria
  1373. 0x2c01: "ar_JO", # Arabic - Jordan
  1374. 0x3001: "ar_LB", # Arabic - Lebanon
  1375. 0x3401: "ar_KW", # Arabic - Kuwait
  1376. 0x3801: "ar_AE", # Arabic - United Arab Emirates
  1377. 0x3c01: "ar_BH", # Arabic - Bahrain
  1378. 0x4001: "ar_QA", # Arabic - Qatar
  1379. 0x042b: "hy_AM", # Armenian
  1380. 0x044d: "as_IN", # Assamese - India
  1381. 0x042c: "az_AZ", # Azeri - Latin
  1382. 0x082c: "az_AZ", # Azeri - Cyrillic
  1383. 0x046d: "ba_RU", # Bashkir
  1384. 0x042d: "eu_ES", # Basque - Russia
  1385. 0x0423: "be_BY", # Belarusian
  1386. 0x0445: "bn_IN", # Begali
  1387. 0x201a: "bs_BA", # Bosnian - Cyrillic
  1388. 0x141a: "bs_BA", # Bosnian - Latin
  1389. 0x047e: "br_FR", # Breton - France
  1390. 0x0402: "bg_BG", # Bulgarian
  1391. # 0x0455: "my_MM", # Burmese - Not supported
  1392. 0x0403: "ca_ES", # Catalan
  1393. 0x0004: "zh_CHS",# Chinese - Simplified
  1394. 0x0404: "zh_TW", # Chinese - Taiwan
  1395. 0x0804: "zh_CN", # Chinese - PRC
  1396. 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
  1397. 0x1004: "zh_SG", # Chinese - Singapore
  1398. 0x1404: "zh_MO", # Chinese - Macao S.A.R.
  1399. 0x7c04: "zh_CHT",# Chinese - Traditional
  1400. 0x0483: "co_FR", # Corsican - France
  1401. 0x041a: "hr_HR", # Croatian
  1402. 0x101a: "hr_BA", # Croatian - Bosnia
  1403. 0x0405: "cs_CZ", # Czech
  1404. 0x0406: "da_DK", # Danish
  1405. 0x048c: "gbz_AF",# Dari - Afghanistan
  1406. 0x0465: "div_MV",# Divehi - Maldives
  1407. 0x0413: "nl_NL", # Dutch - The Netherlands
  1408. 0x0813: "nl_BE", # Dutch - Belgium
  1409. 0x0409: "en_US", # English - United States
  1410. 0x0809: "en_GB", # English - United Kingdom
  1411. 0x0c09: "en_AU", # English - Australia
  1412. 0x1009: "en_CA", # English - Canada
  1413. 0x1409: "en_NZ", # English - New Zealand
  1414. 0x1809: "en_IE", # English - Ireland
  1415. 0x1c09: "en_ZA", # English - South Africa
  1416. 0x2009: "en_JA", # English - Jamaica
  1417. 0x2409: "en_CB", # English - Caribbean
  1418. 0x2809: "en_BZ", # English - Belize
  1419. 0x2c09: "en_TT", # English - Trinidad
  1420. 0x3009: "en_ZW", # English - Zimbabwe
  1421. 0x3409: "en_PH", # English - Philippines
  1422. 0x4009: "en_IN", # English - India
  1423. 0x4409: "en_MY", # English - Malaysia
  1424. 0x4809: "en_IN", # English - Singapore
  1425. 0x0425: "et_EE", # Estonian
  1426. 0x0438: "fo_FO", # Faroese
  1427. 0x0464: "fil_PH",# Filipino
  1428. 0x040b: "fi_FI", # Finnish
  1429. 0x040c: "fr_FR", # French - France
  1430. 0x080c: "fr_BE", # French - Belgium
  1431. 0x0c0c: "fr_CA", # French - Canada
  1432. 0x100c: "fr_CH", # French - Switzerland
  1433. 0x140c: "fr_LU", # French - Luxembourg
  1434. 0x180c: "fr_MC", # French - Monaco
  1435. 0x0462: "fy_NL", # Frisian - Netherlands
  1436. 0x0456: "gl_ES", # Galician
  1437. 0x0437: "ka_GE", # Georgian
  1438. 0x0407: "de_DE", # German - Germany
  1439. 0x0807: "de_CH", # German - Switzerland
  1440. 0x0c07: "de_AT", # German - Austria
  1441. 0x1007: "de_LU", # German - Luxembourg
  1442. 0x1407: "de_LI", # German - Liechtenstein
  1443. 0x0408: "el_GR", # Greek
  1444. 0x046f: "kl_GL", # Greenlandic - Greenland
  1445. 0x0447: "gu_IN", # Gujarati
  1446. 0x0468: "ha_NG", # Hausa - Latin
  1447. 0x040d: "he_IL", # Hebrew
  1448. 0x0439: "hi_IN", # Hindi
  1449. 0x040e: "hu_HU", # Hungarian
  1450. 0x040f: "is_IS", # Icelandic
  1451. 0x0421: "id_ID", # Indonesian
  1452. 0x045d: "iu_CA", # Inuktitut - Syllabics
  1453. 0x085d: "iu_CA", # Inuktitut - Latin
  1454. 0x083c: "ga_IE", # Irish - Ireland
  1455. 0x0410: "it_IT", # Italian - Italy
  1456. 0x0810: "it_CH", # Italian - Switzerland
  1457. 0x0411: "ja_JP", # Japanese
  1458. 0x044b: "kn_IN", # Kannada - India
  1459. 0x043f: "kk_KZ", # Kazakh
  1460. 0x0453: "kh_KH", # Khmer - Cambodia
  1461. 0x0486: "qut_GT",# K'iche - Guatemala
  1462. 0x0487: "rw_RW", # Kinyarwanda - Rwanda
  1463. 0x0457: "kok_IN",# Konkani
  1464. 0x0412: "ko_KR", # Korean
  1465. 0x0440: "ky_KG", # Kyrgyz
  1466. 0x0454: "lo_LA", # Lao - Lao PDR
  1467. 0x0426: "lv_LV", # Latvian
  1468. 0x0427: "lt_LT", # Lithuanian
  1469. 0x082e: "dsb_DE",# Lower Sorbian - Germany
  1470. 0x046e: "lb_LU", # Luxembourgish
  1471. 0x042f: "mk_MK", # FYROM Macedonian
  1472. 0x043e: "ms_MY", # Malay - Malaysia
  1473. 0x083e: "ms_BN", # Malay - Brunei Darussalam
  1474. 0x044c: "ml_IN", # Malayalam - India
  1475. 0x043a: "mt_MT", # Maltese
  1476. 0x0481: "mi_NZ", # Maori
  1477. 0x047a: "arn_CL",# Mapudungun
  1478. 0x044e: "mr_IN", # Marathi
  1479. 0x047c: "moh_CA",# Mohawk - Canada
  1480. 0x0450: "mn_MN", # Mongolian - Cyrillic
  1481. 0x0850: "mn_CN", # Mongolian - PRC
  1482. 0x0461: "ne_NP", # Nepali
  1483. 0x0414: "nb_NO", # Norwegian - Bokmal
  1484. 0x0814: "nn_NO", # Norwegian - Nynorsk
  1485. 0x0482: "oc_FR", # Occitan - France
  1486. 0x0448: "or_IN", # Oriya - India
  1487. 0x0463: "ps_AF", # Pashto - Afghanistan
  1488. 0x0429: "fa_IR", # Persian
  1489. 0x0415: "pl_PL", # Polish
  1490. 0x0416: "pt_BR", # Portuguese - Brazil
  1491. 0x0816: "pt_PT", # Portuguese - Portugal
  1492. 0x0446: "pa_IN", # Punjabi
  1493. 0x046b: "quz_BO",# Quechua (Bolivia)
  1494. 0x086b: "quz_EC",# Quechua (Ecuador)
  1495. 0x0c6b: "quz_PE",# Quechua (Peru)
  1496. 0x0418: "ro_RO", # Romanian - Romania
  1497. 0x0417: "rm_CH", # Romansh
  1498. 0x0419: "ru_RU", # Russian
  1499. 0x243b: "smn_FI",# Sami Finland
  1500. 0x103b: "smj_NO",# Sami Norway
  1501. 0x143b: "smj_SE",# Sami Sweden
  1502. 0x043b: "se_NO", # Sami Northern Norway
  1503. 0x083b: "se_SE", # Sami Northern Sweden
  1504. 0x0c3b: "se_FI", # Sami Northern Finland
  1505. 0x203b: "sms_FI",# Sami Skolt
  1506. 0x183b: "sma_NO",# Sami Southern Norway
  1507. 0x1c3b: "sma_SE",# Sami Southern Sweden
  1508. 0x044f: "sa_IN", # Sanskrit
  1509. 0x0c1a: "sr_SP", # Serbian - Cyrillic
  1510. 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
  1511. 0x081a: "sr_SP", # Serbian - Latin
  1512. 0x181a: "sr_BA", # Serbian - Bosnia Latin
  1513. 0x045b: "si_LK", # Sinhala - Sri Lanka
  1514. 0x046c: "ns_ZA", # Northern Sotho
  1515. 0x0432: "tn_ZA", # Setswana - Southern Africa
  1516. 0x041b: "sk_SK", # Slovak
  1517. 0x0424: "sl_SI", # Slovenian
  1518. 0x040a: "es_ES", # Spanish - Spain
  1519. 0x080a: "es_MX", # Spanish - Mexico
  1520. 0x0c0a: "es_ES", # Spanish - Spain (Modern)
  1521. 0x100a: "es_GT", # Spanish - Guatemala
  1522. 0x140a: "es_CR", # Spanish - Costa Rica
  1523. 0x180a: "es_PA", # Spanish - Panama
  1524. 0x1c0a: "es_DO", # Spanish - Dominican Republic
  1525. 0x200a: "es_VE", # Spanish - Venezuela
  1526. 0x240a: "es_CO", # Spanish - Colombia
  1527. 0x280a: "es_PE", # Spanish - Peru
  1528. 0x2c0a: "es_AR", # Spanish - Argentina
  1529. 0x300a: "es_EC", # Spanish - Ecuador
  1530. 0x340a: "es_CL", # Spanish - Chile
  1531. 0x380a: "es_UR", # Spanish - Uruguay
  1532. 0x3c0a: "es_PY", # Spanish - Paraguay
  1533. 0x400a: "es_BO", # Spanish - Bolivia
  1534. 0x440a: "es_SV", # Spanish - El Salvador
  1535. 0x480a: "es_HN", # Spanish - Honduras
  1536. 0x4c0a: "es_NI", # Spanish - Nicaragua
  1537. 0x500a: "es_PR", # Spanish - Puerto Rico
  1538. 0x540a: "es_US", # Spanish - United States
  1539. # 0x0430: "", # Sutu - Not supported
  1540. 0x0441: "sw_KE", # Swahili
  1541. 0x041d: "sv_SE", # Swedish - Sweden
  1542. 0x081d: "sv_FI", # Swedish - Finland
  1543. 0x045a: "syr_SY",# Syriac
  1544. 0x0428: "tg_TJ", # Tajik - Cyrillic
  1545. 0x085f: "tmz_DZ",# Tamazight - Latin
  1546. 0x0449: "ta_IN", # Tamil
  1547. 0x0444: "tt_RU", # Tatar
  1548. 0x044a: "te_IN", # Telugu
  1549. 0x041e: "th_TH", # Thai
  1550. 0x0851: "bo_BT", # Tibetan - Bhutan
  1551. 0x0451: "bo_CN", # Tibetan - PRC
  1552. 0x041f: "tr_TR", # Turkish
  1553. 0x0442: "tk_TM", # Turkmen - Cyrillic
  1554. 0x0480: "ug_CN", # Uighur - Arabic
  1555. 0x0422: "uk_UA", # Ukrainian
  1556. 0x042e: "wen_DE",# Upper Sorbian - Germany
  1557. 0x0420: "ur_PK", # Urdu
  1558. 0x0820: "ur_IN", # Urdu - India
  1559. 0x0443: "uz_UZ", # Uzbek - Latin
  1560. 0x0843: "uz_UZ", # Uzbek - Cyrillic
  1561. 0x042a: "vi_VN", # Vietnamese
  1562. 0x0452: "cy_GB", # Welsh
  1563. 0x0488: "wo_SN", # Wolof - Senegal
  1564. 0x0434: "xh_ZA", # Xhosa - South Africa
  1565. 0x0485: "sah_RU",# Yakut - Cyrillic
  1566. 0x0478: "ii_CN", # Yi - PRC
  1567. 0x046a: "yo_NG", # Yoruba - Nigeria
  1568. 0x0435: "zu_ZA", # Zulu
  1569. }
  1570. def _print_locale():
  1571. """ Test function.
  1572. """
  1573. categories = {}
  1574. def _init_categories(categories=categories):
  1575. for k,v in globals().items():
  1576. if k[:3] == 'LC_':
  1577. categories[k] = v
  1578. _init_categories()
  1579. del categories['LC_ALL']
  1580. print('Locale defaults as determined by getdefaultlocale():')
  1581. print('-'*72)
  1582. lang, enc = getdefaultlocale()
  1583. print('Language: ', lang or '(undefined)')
  1584. print('Encoding: ', enc or '(undefined)')
  1585. print()
  1586. print('Locale settings on startup:')
  1587. print('-'*72)
  1588. for name,category in categories.items():
  1589. print(name, '...')
  1590. lang, enc = getlocale(category)
  1591. print(' Language: ', lang or '(undefined)')
  1592. print(' Encoding: ', enc or '(undefined)')
  1593. print()
  1594. print()
  1595. print('Locale settings after calling resetlocale():')
  1596. print('-'*72)
  1597. resetlocale()
  1598. for name,category in categories.items():
  1599. print(name, '...')
  1600. lang, enc = getlocale(category)
  1601. print(' Language: ', lang or '(undefined)')
  1602. print(' Encoding: ', enc or '(undefined)')
  1603. print()
  1604. try:
  1605. setlocale(LC_ALL, "")
  1606. except:
  1607. print('NOTE:')
  1608. print('setlocale(LC_ALL, "") does not support the default locale')
  1609. print('given in the OS environment variables.')
  1610. else:
  1611. print()
  1612. print('Locale settings after calling setlocale(LC_ALL, ""):')
  1613. print('-'*72)
  1614. for name,category in categories.items():
  1615. print(name, '...')
  1616. lang, enc = getlocale(category)
  1617. print(' Language: ', lang or '(undefined)')
  1618. print(' Encoding: ', enc or '(undefined)')
  1619. print()
  1620. ###
  1621. try:
  1622. LC_MESSAGES
  1623. except NameError:
  1624. pass
  1625. else:
  1626. __all__.append("LC_MESSAGES")
  1627. if __name__=='__main__':
  1628. print('Locale aliasing:')
  1629. print()
  1630. _print_locale()
  1631. print()
  1632. print('Number formatting:')
  1633. print()
  1634. _test()