punycode.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. /*! https://mths.be/punycode v1.4.1 by @mathias */
  2. /** Highest positive signed 32-bit float value */
  3. var maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
  4. /** Bootstring parameters */
  5. var base = 36;
  6. var tMin = 1;
  7. var tMax = 26;
  8. var skew = 38;
  9. var damp = 700;
  10. var initialBias = 72;
  11. var initialN = 128; // 0x80
  12. var delimiter = '-'; // '\x2D'
  13. /** Regular expressions */
  14. var regexPunycode = /^xn--/;
  15. var regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII chars
  16. var regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
  17. /** Error messages */
  18. var errors = {
  19. 'overflow': 'Overflow: input needs wider integers to process',
  20. 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
  21. 'invalid-input': 'Invalid input'
  22. };
  23. /** Convenience shortcuts */
  24. var baseMinusTMin = base - tMin;
  25. var floor = Math.floor;
  26. var stringFromCharCode = String.fromCharCode;
  27. /*--------------------------------------------------------------------------*/
  28. /**
  29. * A generic error utility function.
  30. * @private
  31. * @param {String} type The error type.
  32. * @returns {Error} Throws a `RangeError` with the applicable error message.
  33. */
  34. function error(type) {
  35. throw new RangeError(errors[type]);
  36. }
  37. /**
  38. * A generic `Array#map` utility function.
  39. * @private
  40. * @param {Array} array The array to iterate over.
  41. * @param {Function} callback The function that gets called for every array
  42. * item.
  43. * @returns {Array} A new array of values returned by the callback function.
  44. */
  45. function map(array, fn) {
  46. var length = array.length;
  47. var result = [];
  48. while (length--) {
  49. result[length] = fn(array[length]);
  50. }
  51. return result;
  52. }
  53. /**
  54. * A simple `Array#map`-like wrapper to work with domain name strings or email
  55. * addresses.
  56. * @private
  57. * @param {String} domain The domain name or email address.
  58. * @param {Function} callback The function that gets called for every
  59. * character.
  60. * @returns {Array} A new string of characters returned by the callback
  61. * function.
  62. */
  63. function mapDomain(string, fn) {
  64. var parts = string.split('@');
  65. var result = '';
  66. if (parts.length > 1) {
  67. // In email addresses, only the domain name should be punycoded. Leave
  68. // the local part (i.e. everything up to `@`) intact.
  69. result = parts[0] + '@';
  70. string = parts[1];
  71. }
  72. // Avoid `split(regex)` for IE8 compatibility. See #17.
  73. string = string.replace(regexSeparators, '\x2E');
  74. var labels = string.split('.');
  75. var encoded = map(labels, fn).join('.');
  76. return result + encoded;
  77. }
  78. /**
  79. * Creates an array containing the numeric code points of each Unicode
  80. * character in the string. While JavaScript uses UCS-2 internally,
  81. * this function will convert a pair of surrogate halves (each of which
  82. * UCS-2 exposes as separate characters) into a single code point,
  83. * matching UTF-16.
  84. * @see `punycode.ucs2.encode`
  85. * @see <https://mathiasbynens.be/notes/javascript-encoding>
  86. * @memberOf punycode.ucs2
  87. * @name decode
  88. * @param {String} string The Unicode input string (UCS-2).
  89. * @returns {Array} The new array of code points.
  90. */
  91. function ucs2decode(string) {
  92. var output = [],
  93. counter = 0,
  94. length = string.length,
  95. value,
  96. extra;
  97. while (counter < length) {
  98. value = string.charCodeAt(counter++);
  99. if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
  100. // high surrogate, and there is a next character
  101. extra = string.charCodeAt(counter++);
  102. if ((extra & 0xFC00) == 0xDC00) { // low surrogate
  103. output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
  104. } else {
  105. // unmatched surrogate; only append this code unit, in case the next
  106. // code unit is the high surrogate of a surrogate pair
  107. output.push(value);
  108. counter--;
  109. }
  110. } else {
  111. output.push(value);
  112. }
  113. }
  114. return output;
  115. }
  116. /**
  117. * Creates a string based on an array of numeric code points.
  118. * @see `punycode.ucs2.decode`
  119. * @memberOf punycode.ucs2
  120. * @name encode
  121. * @param {Array} codePoints The array of numeric code points.
  122. * @returns {String} The new Unicode string (UCS-2).
  123. */
  124. function ucs2encode(array) {
  125. return map(array, function(value) {
  126. var output = '';
  127. if (value > 0xFFFF) {
  128. value -= 0x10000;
  129. output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
  130. value = 0xDC00 | value & 0x3FF;
  131. }
  132. output += stringFromCharCode(value);
  133. return output;
  134. }).join('');
  135. }
  136. /**
  137. * Converts a basic code point into a digit/integer.
  138. * @see `digitToBasic()`
  139. * @private
  140. * @param {Number} codePoint The basic numeric code point value.
  141. * @returns {Number} The numeric value of a basic code point (for use in
  142. * representing integers) in the range `0` to `base - 1`, or `base` if
  143. * the code point does not represent a value.
  144. */
  145. function basicToDigit(codePoint) {
  146. if (codePoint - 48 < 10) {
  147. return codePoint - 22;
  148. }
  149. if (codePoint - 65 < 26) {
  150. return codePoint - 65;
  151. }
  152. if (codePoint - 97 < 26) {
  153. return codePoint - 97;
  154. }
  155. return base;
  156. }
  157. /**
  158. * Converts a digit/integer into a basic code point.
  159. * @see `basicToDigit()`
  160. * @private
  161. * @param {Number} digit The numeric value of a basic code point.
  162. * @returns {Number} The basic code point whose value (when used for
  163. * representing integers) is `digit`, which needs to be in the range
  164. * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
  165. * used; else, the lowercase form is used. The behavior is undefined
  166. * if `flag` is non-zero and `digit` has no uppercase form.
  167. */
  168. function digitToBasic(digit, flag) {
  169. // 0..25 map to ASCII a..z or A..Z
  170. // 26..35 map to ASCII 0..9
  171. return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
  172. }
  173. /**
  174. * Bias adaptation function as per section 3.4 of RFC 3492.
  175. * https://tools.ietf.org/html/rfc3492#section-3.4
  176. * @private
  177. */
  178. function adapt(delta, numPoints, firstTime) {
  179. var k = 0;
  180. delta = firstTime ? floor(delta / damp) : delta >> 1;
  181. delta += floor(delta / numPoints);
  182. for ( /* no initialization */ ; delta > baseMinusTMin * tMax >> 1; k += base) {
  183. delta = floor(delta / baseMinusTMin);
  184. }
  185. return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
  186. }
  187. /**
  188. * Converts a Punycode string of ASCII-only symbols to a string of Unicode
  189. * symbols.
  190. * @memberOf punycode
  191. * @param {String} input The Punycode string of ASCII-only symbols.
  192. * @returns {String} The resulting string of Unicode symbols.
  193. */
  194. export function decode(input) {
  195. // Don't use UCS-2
  196. var output = [],
  197. inputLength = input.length,
  198. out,
  199. i = 0,
  200. n = initialN,
  201. bias = initialBias,
  202. basic,
  203. j,
  204. index,
  205. oldi,
  206. w,
  207. k,
  208. digit,
  209. t,
  210. /** Cached calculation results */
  211. baseMinusT;
  212. // Handle the basic code points: let `basic` be the number of input code
  213. // points before the last delimiter, or `0` if there is none, then copy
  214. // the first basic code points to the output.
  215. basic = input.lastIndexOf(delimiter);
  216. if (basic < 0) {
  217. basic = 0;
  218. }
  219. for (j = 0; j < basic; ++j) {
  220. // if it's not a basic code point
  221. if (input.charCodeAt(j) >= 0x80) {
  222. error('not-basic');
  223. }
  224. output.push(input.charCodeAt(j));
  225. }
  226. // Main decoding loop: start just after the last delimiter if any basic code
  227. // points were copied; start at the beginning otherwise.
  228. for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */ ) {
  229. // `index` is the index of the next character to be consumed.
  230. // Decode a generalized variable-length integer into `delta`,
  231. // which gets added to `i`. The overflow checking is easier
  232. // if we increase `i` as we go, then subtract off its starting
  233. // value at the end to obtain `delta`.
  234. for (oldi = i, w = 1, k = base; /* no condition */ ; k += base) {
  235. if (index >= inputLength) {
  236. error('invalid-input');
  237. }
  238. digit = basicToDigit(input.charCodeAt(index++));
  239. if (digit >= base || digit > floor((maxInt - i) / w)) {
  240. error('overflow');
  241. }
  242. i += digit * w;
  243. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  244. if (digit < t) {
  245. break;
  246. }
  247. baseMinusT = base - t;
  248. if (w > floor(maxInt / baseMinusT)) {
  249. error('overflow');
  250. }
  251. w *= baseMinusT;
  252. }
  253. out = output.length + 1;
  254. bias = adapt(i - oldi, out, oldi == 0);
  255. // `i` was supposed to wrap around from `out` to `0`,
  256. // incrementing `n` each time, so we'll fix that now:
  257. if (floor(i / out) > maxInt - n) {
  258. error('overflow');
  259. }
  260. n += floor(i / out);
  261. i %= out;
  262. // Insert `n` at position `i` of the output
  263. output.splice(i++, 0, n);
  264. }
  265. return ucs2encode(output);
  266. }
  267. /**
  268. * Converts a string of Unicode symbols (e.g. a domain name label) to a
  269. * Punycode string of ASCII-only symbols.
  270. * @memberOf punycode
  271. * @param {String} input The string of Unicode symbols.
  272. * @returns {String} The resulting Punycode string of ASCII-only symbols.
  273. */
  274. export function encode(input) {
  275. var n,
  276. delta,
  277. handledCPCount,
  278. basicLength,
  279. bias,
  280. j,
  281. m,
  282. q,
  283. k,
  284. t,
  285. currentValue,
  286. output = [],
  287. /** `inputLength` will hold the number of code points in `input`. */
  288. inputLength,
  289. /** Cached calculation results */
  290. handledCPCountPlusOne,
  291. baseMinusT,
  292. qMinusT;
  293. // Convert the input in UCS-2 to Unicode
  294. input = ucs2decode(input);
  295. // Cache the length
  296. inputLength = input.length;
  297. // Initialize the state
  298. n = initialN;
  299. delta = 0;
  300. bias = initialBias;
  301. // Handle the basic code points
  302. for (j = 0; j < inputLength; ++j) {
  303. currentValue = input[j];
  304. if (currentValue < 0x80) {
  305. output.push(stringFromCharCode(currentValue));
  306. }
  307. }
  308. handledCPCount = basicLength = output.length;
  309. // `handledCPCount` is the number of code points that have been handled;
  310. // `basicLength` is the number of basic code points.
  311. // Finish the basic string - if it is not empty - with a delimiter
  312. if (basicLength) {
  313. output.push(delimiter);
  314. }
  315. // Main encoding loop:
  316. while (handledCPCount < inputLength) {
  317. // All non-basic code points < n have been handled already. Find the next
  318. // larger one:
  319. for (m = maxInt, j = 0; j < inputLength; ++j) {
  320. currentValue = input[j];
  321. if (currentValue >= n && currentValue < m) {
  322. m = currentValue;
  323. }
  324. }
  325. // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
  326. // but guard against overflow
  327. handledCPCountPlusOne = handledCPCount + 1;
  328. if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
  329. error('overflow');
  330. }
  331. delta += (m - n) * handledCPCountPlusOne;
  332. n = m;
  333. for (j = 0; j < inputLength; ++j) {
  334. currentValue = input[j];
  335. if (currentValue < n && ++delta > maxInt) {
  336. error('overflow');
  337. }
  338. if (currentValue == n) {
  339. // Represent delta as a generalized variable-length integer
  340. for (q = delta, k = base; /* no condition */ ; k += base) {
  341. t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  342. if (q < t) {
  343. break;
  344. }
  345. qMinusT = q - t;
  346. baseMinusT = base - t;
  347. output.push(
  348. stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
  349. );
  350. q = floor(qMinusT / baseMinusT);
  351. }
  352. output.push(stringFromCharCode(digitToBasic(q, 0)));
  353. bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
  354. delta = 0;
  355. ++handledCPCount;
  356. }
  357. }
  358. ++delta;
  359. ++n;
  360. }
  361. return output.join('');
  362. }
  363. /**
  364. * Converts a Punycode string representing a domain name or an email address
  365. * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
  366. * it doesn't matter if you call it on a string that has already been
  367. * converted to Unicode.
  368. * @memberOf punycode
  369. * @param {String} input The Punycoded domain name or email address to
  370. * convert to Unicode.
  371. * @returns {String} The Unicode representation of the given Punycode
  372. * string.
  373. */
  374. export function toUnicode(input) {
  375. return mapDomain(input, function(string) {
  376. return regexPunycode.test(string) ?
  377. decode(string.slice(4).toLowerCase()) :
  378. string;
  379. });
  380. }
  381. /**
  382. * Converts a Unicode string representing a domain name or an email address to
  383. * Punycode. Only the non-ASCII parts of the domain name will be converted,
  384. * i.e. it doesn't matter if you call it with a domain that's already in
  385. * ASCII.
  386. * @memberOf punycode
  387. * @param {String} input The domain name or email address to convert, as a
  388. * Unicode string.
  389. * @returns {String} The Punycode representation of the given domain name or
  390. * email address.
  391. */
  392. export function toASCII(input) {
  393. return mapDomain(input, function(string) {
  394. return regexNonASCII.test(string) ?
  395. 'xn--' + encode(string) :
  396. string;
  397. });
  398. }
  399. export var version = '1.4.1';
  400. /**
  401. * An object of methods to convert from JavaScript's internal character
  402. * representation (UCS-2) to Unicode code points, and back.
  403. * @see <https://mathiasbynens.be/notes/javascript-encoding>
  404. * @memberOf punycode
  405. * @type Object
  406. */
  407. export var ucs2 = {
  408. decode: ucs2decode,
  409. encode: ucs2encode
  410. };
  411. export default {
  412. version: version,
  413. ucs2: ucs2,
  414. toASCII: toASCII,
  415. toUnicode: toUnicode,
  416. encode: encode,
  417. decode: decode
  418. }