dataURL.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. const assert = require('assert')
  2. const { atob } = require('buffer')
  3. const { isomorphicDecode } = require('./util')
  4. const encoder = new TextEncoder()
  5. /**
  6. * @see https://mimesniff.spec.whatwg.org/#http-token-code-point
  7. */
  8. const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/
  9. const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line
  10. /**
  11. * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
  12. */
  13. const HTTP_QUOTED_STRING_TOKENS = /[\u0009|\u0020-\u007E|\u0080-\u00FF]/ // eslint-disable-line
  14. // https://fetch.spec.whatwg.org/#data-url-processor
  15. /** @param {URL} dataURL */
  16. function dataURLProcessor (dataURL) {
  17. // 1. Assert: dataURL’s scheme is "data".
  18. assert(dataURL.protocol === 'data:')
  19. // 2. Let input be the result of running the URL
  20. // serializer on dataURL with exclude fragment
  21. // set to true.
  22. let input = URLSerializer(dataURL, true)
  23. // 3. Remove the leading "data:" string from input.
  24. input = input.slice(5)
  25. // 4. Let position point at the start of input.
  26. const position = { position: 0 }
  27. // 5. Let mimeType be the result of collecting a
  28. // sequence of code points that are not equal
  29. // to U+002C (,), given position.
  30. let mimeType = collectASequenceOfCodePointsFast(
  31. ',',
  32. input,
  33. position
  34. )
  35. // 6. Strip leading and trailing ASCII whitespace
  36. // from mimeType.
  37. // Undici implementation note: we need to store the
  38. // length because if the mimetype has spaces removed,
  39. // the wrong amount will be sliced from the input in
  40. // step #9
  41. const mimeTypeLength = mimeType.length
  42. mimeType = removeASCIIWhitespace(mimeType, true, true)
  43. // 7. If position is past the end of input, then
  44. // return failure
  45. if (position.position >= input.length) {
  46. return 'failure'
  47. }
  48. // 8. Advance position by 1.
  49. position.position++
  50. // 9. Let encodedBody be the remainder of input.
  51. const encodedBody = input.slice(mimeTypeLength + 1)
  52. // 10. Let body be the percent-decoding of encodedBody.
  53. let body = stringPercentDecode(encodedBody)
  54. // 11. If mimeType ends with U+003B (;), followed by
  55. // zero or more U+0020 SPACE, followed by an ASCII
  56. // case-insensitive match for "base64", then:
  57. if (/;(\u0020){0,}base64$/i.test(mimeType)) {
  58. // 1. Let stringBody be the isomorphic decode of body.
  59. const stringBody = isomorphicDecode(body)
  60. // 2. Set body to the forgiving-base64 decode of
  61. // stringBody.
  62. body = forgivingBase64(stringBody)
  63. // 3. If body is failure, then return failure.
  64. if (body === 'failure') {
  65. return 'failure'
  66. }
  67. // 4. Remove the last 6 code points from mimeType.
  68. mimeType = mimeType.slice(0, -6)
  69. // 5. Remove trailing U+0020 SPACE code points from mimeType,
  70. // if any.
  71. mimeType = mimeType.replace(/(\u0020)+$/, '')
  72. // 6. Remove the last U+003B (;) code point from mimeType.
  73. mimeType = mimeType.slice(0, -1)
  74. }
  75. // 12. If mimeType starts with U+003B (;), then prepend
  76. // "text/plain" to mimeType.
  77. if (mimeType.startsWith(';')) {
  78. mimeType = 'text/plain' + mimeType
  79. }
  80. // 13. Let mimeTypeRecord be the result of parsing
  81. // mimeType.
  82. let mimeTypeRecord = parseMIMEType(mimeType)
  83. // 14. If mimeTypeRecord is failure, then set
  84. // mimeTypeRecord to text/plain;charset=US-ASCII.
  85. if (mimeTypeRecord === 'failure') {
  86. mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII')
  87. }
  88. // 15. Return a new data: URL struct whose MIME
  89. // type is mimeTypeRecord and body is body.
  90. // https://fetch.spec.whatwg.org/#data-url-struct
  91. return { mimeType: mimeTypeRecord, body }
  92. }
  93. // https://url.spec.whatwg.org/#concept-url-serializer
  94. /**
  95. * @param {URL} url
  96. * @param {boolean} excludeFragment
  97. */
  98. function URLSerializer (url, excludeFragment = false) {
  99. const href = url.href
  100. if (!excludeFragment) {
  101. return href
  102. }
  103. const hash = href.lastIndexOf('#')
  104. if (hash === -1) {
  105. return href
  106. }
  107. return href.slice(0, hash)
  108. }
  109. // https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
  110. /**
  111. * @param {(char: string) => boolean} condition
  112. * @param {string} input
  113. * @param {{ position: number }} position
  114. */
  115. function collectASequenceOfCodePoints (condition, input, position) {
  116. // 1. Let result be the empty string.
  117. let result = ''
  118. // 2. While position doesn’t point past the end of input and the
  119. // code point at position within input meets the condition condition:
  120. while (position.position < input.length && condition(input[position.position])) {
  121. // 1. Append that code point to the end of result.
  122. result += input[position.position]
  123. // 2. Advance position by 1.
  124. position.position++
  125. }
  126. // 3. Return result.
  127. return result
  128. }
  129. /**
  130. * A faster collectASequenceOfCodePoints that only works when comparing a single character.
  131. * @param {string} char
  132. * @param {string} input
  133. * @param {{ position: number }} position
  134. */
  135. function collectASequenceOfCodePointsFast (char, input, position) {
  136. const idx = input.indexOf(char, position.position)
  137. const start = position.position
  138. if (idx === -1) {
  139. position.position = input.length
  140. return input.slice(start)
  141. }
  142. position.position = idx
  143. return input.slice(start, position.position)
  144. }
  145. // https://url.spec.whatwg.org/#string-percent-decode
  146. /** @param {string} input */
  147. function stringPercentDecode (input) {
  148. // 1. Let bytes be the UTF-8 encoding of input.
  149. const bytes = encoder.encode(input)
  150. // 2. Return the percent-decoding of bytes.
  151. return percentDecode(bytes)
  152. }
  153. // https://url.spec.whatwg.org/#percent-decode
  154. /** @param {Uint8Array} input */
  155. function percentDecode (input) {
  156. // 1. Let output be an empty byte sequence.
  157. /** @type {number[]} */
  158. const output = []
  159. // 2. For each byte byte in input:
  160. for (let i = 0; i < input.length; i++) {
  161. const byte = input[i]
  162. // 1. If byte is not 0x25 (%), then append byte to output.
  163. if (byte !== 0x25) {
  164. output.push(byte)
  165. // 2. Otherwise, if byte is 0x25 (%) and the next two bytes
  166. // after byte in input are not in the ranges
  167. // 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
  168. // and 0x61 (a) to 0x66 (f), all inclusive, append byte
  169. // to output.
  170. } else if (
  171. byte === 0x25 &&
  172. !/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
  173. ) {
  174. output.push(0x25)
  175. // 3. Otherwise:
  176. } else {
  177. // 1. Let bytePoint be the two bytes after byte in input,
  178. // decoded, and then interpreted as hexadecimal number.
  179. const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2])
  180. const bytePoint = Number.parseInt(nextTwoBytes, 16)
  181. // 2. Append a byte whose value is bytePoint to output.
  182. output.push(bytePoint)
  183. // 3. Skip the next two bytes in input.
  184. i += 2
  185. }
  186. }
  187. // 3. Return output.
  188. return Uint8Array.from(output)
  189. }
  190. // https://mimesniff.spec.whatwg.org/#parse-a-mime-type
  191. /** @param {string} input */
  192. function parseMIMEType (input) {
  193. // 1. Remove any leading and trailing HTTP whitespace
  194. // from input.
  195. input = removeHTTPWhitespace(input, true, true)
  196. // 2. Let position be a position variable for input,
  197. // initially pointing at the start of input.
  198. const position = { position: 0 }
  199. // 3. Let type be the result of collecting a sequence
  200. // of code points that are not U+002F (/) from
  201. // input, given position.
  202. const type = collectASequenceOfCodePointsFast(
  203. '/',
  204. input,
  205. position
  206. )
  207. // 4. If type is the empty string or does not solely
  208. // contain HTTP token code points, then return failure.
  209. // https://mimesniff.spec.whatwg.org/#http-token-code-point
  210. if (type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type)) {
  211. return 'failure'
  212. }
  213. // 5. If position is past the end of input, then return
  214. // failure
  215. if (position.position > input.length) {
  216. return 'failure'
  217. }
  218. // 6. Advance position by 1. (This skips past U+002F (/).)
  219. position.position++
  220. // 7. Let subtype be the result of collecting a sequence of
  221. // code points that are not U+003B (;) from input, given
  222. // position.
  223. let subtype = collectASequenceOfCodePointsFast(
  224. ';',
  225. input,
  226. position
  227. )
  228. // 8. Remove any trailing HTTP whitespace from subtype.
  229. subtype = removeHTTPWhitespace(subtype, false, true)
  230. // 9. If subtype is the empty string or does not solely
  231. // contain HTTP token code points, then return failure.
  232. if (subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype)) {
  233. return 'failure'
  234. }
  235. const typeLowercase = type.toLowerCase()
  236. const subtypeLowercase = subtype.toLowerCase()
  237. // 10. Let mimeType be a new MIME type record whose type
  238. // is type, in ASCII lowercase, and subtype is subtype,
  239. // in ASCII lowercase.
  240. // https://mimesniff.spec.whatwg.org/#mime-type
  241. const mimeType = {
  242. type: typeLowercase,
  243. subtype: subtypeLowercase,
  244. /** @type {Map<string, string>} */
  245. parameters: new Map(),
  246. // https://mimesniff.spec.whatwg.org/#mime-type-essence
  247. essence: `${typeLowercase}/${subtypeLowercase}`
  248. }
  249. // 11. While position is not past the end of input:
  250. while (position.position < input.length) {
  251. // 1. Advance position by 1. (This skips past U+003B (;).)
  252. position.position++
  253. // 2. Collect a sequence of code points that are HTTP
  254. // whitespace from input given position.
  255. collectASequenceOfCodePoints(
  256. // https://fetch.spec.whatwg.org/#http-whitespace
  257. char => HTTP_WHITESPACE_REGEX.test(char),
  258. input,
  259. position
  260. )
  261. // 3. Let parameterName be the result of collecting a
  262. // sequence of code points that are not U+003B (;)
  263. // or U+003D (=) from input, given position.
  264. let parameterName = collectASequenceOfCodePoints(
  265. (char) => char !== ';' && char !== '=',
  266. input,
  267. position
  268. )
  269. // 4. Set parameterName to parameterName, in ASCII
  270. // lowercase.
  271. parameterName = parameterName.toLowerCase()
  272. // 5. If position is not past the end of input, then:
  273. if (position.position < input.length) {
  274. // 1. If the code point at position within input is
  275. // U+003B (;), then continue.
  276. if (input[position.position] === ';') {
  277. continue
  278. }
  279. // 2. Advance position by 1. (This skips past U+003D (=).)
  280. position.position++
  281. }
  282. // 6. If position is past the end of input, then break.
  283. if (position.position > input.length) {
  284. break
  285. }
  286. // 7. Let parameterValue be null.
  287. let parameterValue = null
  288. // 8. If the code point at position within input is
  289. // U+0022 ("), then:
  290. if (input[position.position] === '"') {
  291. // 1. Set parameterValue to the result of collecting
  292. // an HTTP quoted string from input, given position
  293. // and the extract-value flag.
  294. parameterValue = collectAnHTTPQuotedString(input, position, true)
  295. // 2. Collect a sequence of code points that are not
  296. // U+003B (;) from input, given position.
  297. collectASequenceOfCodePointsFast(
  298. ';',
  299. input,
  300. position
  301. )
  302. // 9. Otherwise:
  303. } else {
  304. // 1. Set parameterValue to the result of collecting
  305. // a sequence of code points that are not U+003B (;)
  306. // from input, given position.
  307. parameterValue = collectASequenceOfCodePointsFast(
  308. ';',
  309. input,
  310. position
  311. )
  312. // 2. Remove any trailing HTTP whitespace from parameterValue.
  313. parameterValue = removeHTTPWhitespace(parameterValue, false, true)
  314. // 3. If parameterValue is the empty string, then continue.
  315. if (parameterValue.length === 0) {
  316. continue
  317. }
  318. }
  319. // 10. If all of the following are true
  320. // - parameterName is not the empty string
  321. // - parameterName solely contains HTTP token code points
  322. // - parameterValue solely contains HTTP quoted-string token code points
  323. // - mimeType’s parameters[parameterName] does not exist
  324. // then set mimeType’s parameters[parameterName] to parameterValue.
  325. if (
  326. parameterName.length !== 0 &&
  327. HTTP_TOKEN_CODEPOINTS.test(parameterName) &&
  328. (parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) &&
  329. !mimeType.parameters.has(parameterName)
  330. ) {
  331. mimeType.parameters.set(parameterName, parameterValue)
  332. }
  333. }
  334. // 12. Return mimeType.
  335. return mimeType
  336. }
  337. // https://infra.spec.whatwg.org/#forgiving-base64-decode
  338. /** @param {string} data */
  339. function forgivingBase64 (data) {
  340. // 1. Remove all ASCII whitespace from data.
  341. data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '') // eslint-disable-line
  342. // 2. If data’s code point length divides by 4 leaving
  343. // no remainder, then:
  344. if (data.length % 4 === 0) {
  345. // 1. If data ends with one or two U+003D (=) code points,
  346. // then remove them from data.
  347. data = data.replace(/=?=$/, '')
  348. }
  349. // 3. If data’s code point length divides by 4 leaving
  350. // a remainder of 1, then return failure.
  351. if (data.length % 4 === 1) {
  352. return 'failure'
  353. }
  354. // 4. If data contains a code point that is not one of
  355. // U+002B (+)
  356. // U+002F (/)
  357. // ASCII alphanumeric
  358. // then return failure.
  359. if (/[^+/0-9A-Za-z]/.test(data)) {
  360. return 'failure'
  361. }
  362. const binary = atob(data)
  363. const bytes = new Uint8Array(binary.length)
  364. for (let byte = 0; byte < binary.length; byte++) {
  365. bytes[byte] = binary.charCodeAt(byte)
  366. }
  367. return bytes
  368. }
  369. // https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
  370. // tests: https://fetch.spec.whatwg.org/#example-http-quoted-string
  371. /**
  372. * @param {string} input
  373. * @param {{ position: number }} position
  374. * @param {boolean?} extractValue
  375. */
  376. function collectAnHTTPQuotedString (input, position, extractValue) {
  377. // 1. Let positionStart be position.
  378. const positionStart = position.position
  379. // 2. Let value be the empty string.
  380. let value = ''
  381. // 3. Assert: the code point at position within input
  382. // is U+0022 (").
  383. assert(input[position.position] === '"')
  384. // 4. Advance position by 1.
  385. position.position++
  386. // 5. While true:
  387. while (true) {
  388. // 1. Append the result of collecting a sequence of code points
  389. // that are not U+0022 (") or U+005C (\) from input, given
  390. // position, to value.
  391. value += collectASequenceOfCodePoints(
  392. (char) => char !== '"' && char !== '\\',
  393. input,
  394. position
  395. )
  396. // 2. If position is past the end of input, then break.
  397. if (position.position >= input.length) {
  398. break
  399. }
  400. // 3. Let quoteOrBackslash be the code point at position within
  401. // input.
  402. const quoteOrBackslash = input[position.position]
  403. // 4. Advance position by 1.
  404. position.position++
  405. // 5. If quoteOrBackslash is U+005C (\), then:
  406. if (quoteOrBackslash === '\\') {
  407. // 1. If position is past the end of input, then append
  408. // U+005C (\) to value and break.
  409. if (position.position >= input.length) {
  410. value += '\\'
  411. break
  412. }
  413. // 2. Append the code point at position within input to value.
  414. value += input[position.position]
  415. // 3. Advance position by 1.
  416. position.position++
  417. // 6. Otherwise:
  418. } else {
  419. // 1. Assert: quoteOrBackslash is U+0022 (").
  420. assert(quoteOrBackslash === '"')
  421. // 2. Break.
  422. break
  423. }
  424. }
  425. // 6. If the extract-value flag is set, then return value.
  426. if (extractValue) {
  427. return value
  428. }
  429. // 7. Return the code points from positionStart to position,
  430. // inclusive, within input.
  431. return input.slice(positionStart, position.position)
  432. }
  433. /**
  434. * @see https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
  435. */
  436. function serializeAMimeType (mimeType) {
  437. assert(mimeType !== 'failure')
  438. const { parameters, essence } = mimeType
  439. // 1. Let serialization be the concatenation of mimeType’s
  440. // type, U+002F (/), and mimeType’s subtype.
  441. let serialization = essence
  442. // 2. For each name → value of mimeType’s parameters:
  443. for (let [name, value] of parameters.entries()) {
  444. // 1. Append U+003B (;) to serialization.
  445. serialization += ';'
  446. // 2. Append name to serialization.
  447. serialization += name
  448. // 3. Append U+003D (=) to serialization.
  449. serialization += '='
  450. // 4. If value does not solely contain HTTP token code
  451. // points or value is the empty string, then:
  452. if (!HTTP_TOKEN_CODEPOINTS.test(value)) {
  453. // 1. Precede each occurence of U+0022 (") or
  454. // U+005C (\) in value with U+005C (\).
  455. value = value.replace(/(\\|")/g, '\\$1')
  456. // 2. Prepend U+0022 (") to value.
  457. value = '"' + value
  458. // 3. Append U+0022 (") to value.
  459. value += '"'
  460. }
  461. // 5. Append value to serialization.
  462. serialization += value
  463. }
  464. // 3. Return serialization.
  465. return serialization
  466. }
  467. /**
  468. * @see https://fetch.spec.whatwg.org/#http-whitespace
  469. * @param {string} char
  470. */
  471. function isHTTPWhiteSpace (char) {
  472. return char === '\r' || char === '\n' || char === '\t' || char === ' '
  473. }
  474. /**
  475. * @see https://fetch.spec.whatwg.org/#http-whitespace
  476. * @param {string} str
  477. */
  478. function removeHTTPWhitespace (str, leading = true, trailing = true) {
  479. let lead = 0
  480. let trail = str.length - 1
  481. if (leading) {
  482. for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++);
  483. }
  484. if (trailing) {
  485. for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--);
  486. }
  487. return str.slice(lead, trail + 1)
  488. }
  489. /**
  490. * @see https://infra.spec.whatwg.org/#ascii-whitespace
  491. * @param {string} char
  492. */
  493. function isASCIIWhitespace (char) {
  494. return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' '
  495. }
  496. /**
  497. * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
  498. */
  499. function removeASCIIWhitespace (str, leading = true, trailing = true) {
  500. let lead = 0
  501. let trail = str.length - 1
  502. if (leading) {
  503. for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++);
  504. }
  505. if (trailing) {
  506. for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--);
  507. }
  508. return str.slice(lead, trail + 1)
  509. }
  510. module.exports = {
  511. dataURLProcessor,
  512. URLSerializer,
  513. collectASequenceOfCodePoints,
  514. collectASequenceOfCodePointsFast,
  515. stringPercentDecode,
  516. parseMIMEType,
  517. collectAnHTTPQuotedString,
  518. serializeAMimeType
  519. }