encoding.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. 'use strict'
  2. /**
  3. * @see https://encoding.spec.whatwg.org/#concept-encoding-get
  4. * @param {string|undefined} label
  5. */
  6. function getEncoding (label) {
  7. if (!label) {
  8. return 'failure'
  9. }
  10. // 1. Remove any leading and trailing ASCII whitespace from label.
  11. // 2. If label is an ASCII case-insensitive match for any of the
  12. // labels listed in the table below, then return the
  13. // corresponding encoding; otherwise return failure.
  14. switch (label.trim().toLowerCase()) {
  15. case 'unicode-1-1-utf-8':
  16. case 'unicode11utf8':
  17. case 'unicode20utf8':
  18. case 'utf-8':
  19. case 'utf8':
  20. case 'x-unicode20utf8':
  21. return 'UTF-8'
  22. case '866':
  23. case 'cp866':
  24. case 'csibm866':
  25. case 'ibm866':
  26. return 'IBM866'
  27. case 'csisolatin2':
  28. case 'iso-8859-2':
  29. case 'iso-ir-101':
  30. case 'iso8859-2':
  31. case 'iso88592':
  32. case 'iso_8859-2':
  33. case 'iso_8859-2:1987':
  34. case 'l2':
  35. case 'latin2':
  36. return 'ISO-8859-2'
  37. case 'csisolatin3':
  38. case 'iso-8859-3':
  39. case 'iso-ir-109':
  40. case 'iso8859-3':
  41. case 'iso88593':
  42. case 'iso_8859-3':
  43. case 'iso_8859-3:1988':
  44. case 'l3':
  45. case 'latin3':
  46. return 'ISO-8859-3'
  47. case 'csisolatin4':
  48. case 'iso-8859-4':
  49. case 'iso-ir-110':
  50. case 'iso8859-4':
  51. case 'iso88594':
  52. case 'iso_8859-4':
  53. case 'iso_8859-4:1988':
  54. case 'l4':
  55. case 'latin4':
  56. return 'ISO-8859-4'
  57. case 'csisolatincyrillic':
  58. case 'cyrillic':
  59. case 'iso-8859-5':
  60. case 'iso-ir-144':
  61. case 'iso8859-5':
  62. case 'iso88595':
  63. case 'iso_8859-5':
  64. case 'iso_8859-5:1988':
  65. return 'ISO-8859-5'
  66. case 'arabic':
  67. case 'asmo-708':
  68. case 'csiso88596e':
  69. case 'csiso88596i':
  70. case 'csisolatinarabic':
  71. case 'ecma-114':
  72. case 'iso-8859-6':
  73. case 'iso-8859-6-e':
  74. case 'iso-8859-6-i':
  75. case 'iso-ir-127':
  76. case 'iso8859-6':
  77. case 'iso88596':
  78. case 'iso_8859-6':
  79. case 'iso_8859-6:1987':
  80. return 'ISO-8859-6'
  81. case 'csisolatingreek':
  82. case 'ecma-118':
  83. case 'elot_928':
  84. case 'greek':
  85. case 'greek8':
  86. case 'iso-8859-7':
  87. case 'iso-ir-126':
  88. case 'iso8859-7':
  89. case 'iso88597':
  90. case 'iso_8859-7':
  91. case 'iso_8859-7:1987':
  92. case 'sun_eu_greek':
  93. return 'ISO-8859-7'
  94. case 'csiso88598e':
  95. case 'csisolatinhebrew':
  96. case 'hebrew':
  97. case 'iso-8859-8':
  98. case 'iso-8859-8-e':
  99. case 'iso-ir-138':
  100. case 'iso8859-8':
  101. case 'iso88598':
  102. case 'iso_8859-8':
  103. case 'iso_8859-8:1988':
  104. case 'visual':
  105. return 'ISO-8859-8'
  106. case 'csiso88598i':
  107. case 'iso-8859-8-i':
  108. case 'logical':
  109. return 'ISO-8859-8-I'
  110. case 'csisolatin6':
  111. case 'iso-8859-10':
  112. case 'iso-ir-157':
  113. case 'iso8859-10':
  114. case 'iso885910':
  115. case 'l6':
  116. case 'latin6':
  117. return 'ISO-8859-10'
  118. case 'iso-8859-13':
  119. case 'iso8859-13':
  120. case 'iso885913':
  121. return 'ISO-8859-13'
  122. case 'iso-8859-14':
  123. case 'iso8859-14':
  124. case 'iso885914':
  125. return 'ISO-8859-14'
  126. case 'csisolatin9':
  127. case 'iso-8859-15':
  128. case 'iso8859-15':
  129. case 'iso885915':
  130. case 'iso_8859-15':
  131. case 'l9':
  132. return 'ISO-8859-15'
  133. case 'iso-8859-16':
  134. return 'ISO-8859-16'
  135. case 'cskoi8r':
  136. case 'koi':
  137. case 'koi8':
  138. case 'koi8-r':
  139. case 'koi8_r':
  140. return 'KOI8-R'
  141. case 'koi8-ru':
  142. case 'koi8-u':
  143. return 'KOI8-U'
  144. case 'csmacintosh':
  145. case 'mac':
  146. case 'macintosh':
  147. case 'x-mac-roman':
  148. return 'macintosh'
  149. case 'iso-8859-11':
  150. case 'iso8859-11':
  151. case 'iso885911':
  152. case 'tis-620':
  153. case 'windows-874':
  154. return 'windows-874'
  155. case 'cp1250':
  156. case 'windows-1250':
  157. case 'x-cp1250':
  158. return 'windows-1250'
  159. case 'cp1251':
  160. case 'windows-1251':
  161. case 'x-cp1251':
  162. return 'windows-1251'
  163. case 'ansi_x3.4-1968':
  164. case 'ascii':
  165. case 'cp1252':
  166. case 'cp819':
  167. case 'csisolatin1':
  168. case 'ibm819':
  169. case 'iso-8859-1':
  170. case 'iso-ir-100':
  171. case 'iso8859-1':
  172. case 'iso88591':
  173. case 'iso_8859-1':
  174. case 'iso_8859-1:1987':
  175. case 'l1':
  176. case 'latin1':
  177. case 'us-ascii':
  178. case 'windows-1252':
  179. case 'x-cp1252':
  180. return 'windows-1252'
  181. case 'cp1253':
  182. case 'windows-1253':
  183. case 'x-cp1253':
  184. return 'windows-1253'
  185. case 'cp1254':
  186. case 'csisolatin5':
  187. case 'iso-8859-9':
  188. case 'iso-ir-148':
  189. case 'iso8859-9':
  190. case 'iso88599':
  191. case 'iso_8859-9':
  192. case 'iso_8859-9:1989':
  193. case 'l5':
  194. case 'latin5':
  195. case 'windows-1254':
  196. case 'x-cp1254':
  197. return 'windows-1254'
  198. case 'cp1255':
  199. case 'windows-1255':
  200. case 'x-cp1255':
  201. return 'windows-1255'
  202. case 'cp1256':
  203. case 'windows-1256':
  204. case 'x-cp1256':
  205. return 'windows-1256'
  206. case 'cp1257':
  207. case 'windows-1257':
  208. case 'x-cp1257':
  209. return 'windows-1257'
  210. case 'cp1258':
  211. case 'windows-1258':
  212. case 'x-cp1258':
  213. return 'windows-1258'
  214. case 'x-mac-cyrillic':
  215. case 'x-mac-ukrainian':
  216. return 'x-mac-cyrillic'
  217. case 'chinese':
  218. case 'csgb2312':
  219. case 'csiso58gb231280':
  220. case 'gb2312':
  221. case 'gb_2312':
  222. case 'gb_2312-80':
  223. case 'gbk':
  224. case 'iso-ir-58':
  225. case 'x-gbk':
  226. return 'GBK'
  227. case 'gb18030':
  228. return 'gb18030'
  229. case 'big5':
  230. case 'big5-hkscs':
  231. case 'cn-big5':
  232. case 'csbig5':
  233. case 'x-x-big5':
  234. return 'Big5'
  235. case 'cseucpkdfmtjapanese':
  236. case 'euc-jp':
  237. case 'x-euc-jp':
  238. return 'EUC-JP'
  239. case 'csiso2022jp':
  240. case 'iso-2022-jp':
  241. return 'ISO-2022-JP'
  242. case 'csshiftjis':
  243. case 'ms932':
  244. case 'ms_kanji':
  245. case 'shift-jis':
  246. case 'shift_jis':
  247. case 'sjis':
  248. case 'windows-31j':
  249. case 'x-sjis':
  250. return 'Shift_JIS'
  251. case 'cseuckr':
  252. case 'csksc56011987':
  253. case 'euc-kr':
  254. case 'iso-ir-149':
  255. case 'korean':
  256. case 'ks_c_5601-1987':
  257. case 'ks_c_5601-1989':
  258. case 'ksc5601':
  259. case 'ksc_5601':
  260. case 'windows-949':
  261. return 'EUC-KR'
  262. case 'csiso2022kr':
  263. case 'hz-gb-2312':
  264. case 'iso-2022-cn':
  265. case 'iso-2022-cn-ext':
  266. case 'iso-2022-kr':
  267. case 'replacement':
  268. return 'replacement'
  269. case 'unicodefffe':
  270. case 'utf-16be':
  271. return 'UTF-16BE'
  272. case 'csunicode':
  273. case 'iso-10646-ucs-2':
  274. case 'ucs-2':
  275. case 'unicode':
  276. case 'unicodefeff':
  277. case 'utf-16':
  278. case 'utf-16le':
  279. return 'UTF-16LE'
  280. case 'x-user-defined':
  281. return 'x-user-defined'
  282. default: return 'failure'
  283. }
  284. }
  285. module.exports = {
  286. getEncoding
  287. }