quopri.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. #! /usr/bin/env python3
  2. """Conversions to/from quoted-printable transport encoding as per RFC 1521."""
  3. # (Dec 1991 version).
  4. __all__ = ["encode", "decode", "encodestring", "decodestring"]
  5. ESCAPE = b'='
  6. MAXLINESIZE = 76
  7. HEX = b'0123456789ABCDEF'
  8. EMPTYSTRING = b''
  9. try:
  10. from binascii import a2b_qp, b2a_qp
  11. except ImportError:
  12. a2b_qp = None
  13. b2a_qp = None
  14. def needsquoting(c, quotetabs, header):
  15. """Decide whether a particular byte ordinal needs to be quoted.
  16. The 'quotetabs' flag indicates whether embedded tabs and spaces should be
  17. quoted. Note that line-ending tabs and spaces are always encoded, as per
  18. RFC 1521.
  19. """
  20. assert isinstance(c, bytes)
  21. if c in b' \t':
  22. return quotetabs
  23. # if header, we have to escape _ because _ is used to escape space
  24. if c == b'_':
  25. return header
  26. return c == ESCAPE or not (b' ' <= c <= b'~')
  27. def quote(c):
  28. """Quote a single character."""
  29. assert isinstance(c, bytes) and len(c)==1
  30. c = ord(c)
  31. return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
  32. def encode(input, output, quotetabs, header=False):
  33. """Read 'input', apply quoted-printable encoding, and write to 'output'.
  34. 'input' and 'output' are binary file objects. The 'quotetabs' flag
  35. indicates whether embedded tabs and spaces should be quoted. Note that
  36. line-ending tabs and spaces are always encoded, as per RFC 1521.
  37. The 'header' flag indicates whether we are encoding spaces as _ as per RFC
  38. 1522."""
  39. if b2a_qp is not None:
  40. data = input.read()
  41. odata = b2a_qp(data, quotetabs=quotetabs, header=header)
  42. output.write(odata)
  43. return
  44. def write(s, output=output, lineEnd=b'\n'):
  45. # RFC 1521 requires that the line ending in a space or tab must have
  46. # that trailing character encoded.
  47. if s and s[-1:] in b' \t':
  48. output.write(s[:-1] + quote(s[-1:]) + lineEnd)
  49. elif s == b'.':
  50. output.write(quote(s) + lineEnd)
  51. else:
  52. output.write(s + lineEnd)
  53. prevline = None
  54. while line := input.readline():
  55. outline = []
  56. # Strip off any readline induced trailing newline
  57. stripped = b''
  58. if line[-1:] == b'\n':
  59. line = line[:-1]
  60. stripped = b'\n'
  61. # Calculate the un-length-limited encoded line
  62. for c in line:
  63. c = bytes((c,))
  64. if needsquoting(c, quotetabs, header):
  65. c = quote(c)
  66. if header and c == b' ':
  67. outline.append(b'_')
  68. else:
  69. outline.append(c)
  70. # First, write out the previous line
  71. if prevline is not None:
  72. write(prevline)
  73. # Now see if we need any soft line breaks because of RFC-imposed
  74. # length limitations. Then do the thisline->prevline dance.
  75. thisline = EMPTYSTRING.join(outline)
  76. while len(thisline) > MAXLINESIZE:
  77. # Don't forget to include the soft line break `=' sign in the
  78. # length calculation!
  79. write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
  80. thisline = thisline[MAXLINESIZE-1:]
  81. # Write out the current line
  82. prevline = thisline
  83. # Write out the last line, without a trailing newline
  84. if prevline is not None:
  85. write(prevline, lineEnd=stripped)
  86. def encodestring(s, quotetabs=False, header=False):
  87. if b2a_qp is not None:
  88. return b2a_qp(s, quotetabs=quotetabs, header=header)
  89. from io import BytesIO
  90. infp = BytesIO(s)
  91. outfp = BytesIO()
  92. encode(infp, outfp, quotetabs, header)
  93. return outfp.getvalue()
  94. def decode(input, output, header=False):
  95. """Read 'input', apply quoted-printable decoding, and write to 'output'.
  96. 'input' and 'output' are binary file objects.
  97. If 'header' is true, decode underscore as space (per RFC 1522)."""
  98. if a2b_qp is not None:
  99. data = input.read()
  100. odata = a2b_qp(data, header=header)
  101. output.write(odata)
  102. return
  103. new = b''
  104. while line := input.readline():
  105. i, n = 0, len(line)
  106. if n > 0 and line[n-1:n] == b'\n':
  107. partial = 0; n = n-1
  108. # Strip trailing whitespace
  109. while n > 0 and line[n-1:n] in b" \t\r":
  110. n = n-1
  111. else:
  112. partial = 1
  113. while i < n:
  114. c = line[i:i+1]
  115. if c == b'_' and header:
  116. new = new + b' '; i = i+1
  117. elif c != ESCAPE:
  118. new = new + c; i = i+1
  119. elif i+1 == n and not partial:
  120. partial = 1; break
  121. elif i+1 < n and line[i+1:i+2] == ESCAPE:
  122. new = new + ESCAPE; i = i+2
  123. elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
  124. new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
  125. else: # Bad escape sequence -- leave it in
  126. new = new + c; i = i+1
  127. if not partial:
  128. output.write(new + b'\n')
  129. new = b''
  130. if new:
  131. output.write(new)
  132. def decodestring(s, header=False):
  133. if a2b_qp is not None:
  134. return a2b_qp(s, header=header)
  135. from io import BytesIO
  136. infp = BytesIO(s)
  137. outfp = BytesIO()
  138. decode(infp, outfp, header=header)
  139. return outfp.getvalue()
  140. # Other helper functions
  141. def ishex(c):
  142. """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
  143. assert isinstance(c, bytes)
  144. return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
  145. def unhex(s):
  146. """Get the integer value of a hexadecimal number."""
  147. bits = 0
  148. for c in s:
  149. c = bytes((c,))
  150. if b'0' <= c <= b'9':
  151. i = ord('0')
  152. elif b'a' <= c <= b'f':
  153. i = ord('a')-10
  154. elif b'A' <= c <= b'F':
  155. i = ord(b'A')-10
  156. else:
  157. assert False, "non-hex digit "+repr(c)
  158. bits = bits*16 + (ord(c) - i)
  159. return bits
  160. def main():
  161. import sys
  162. import getopt
  163. try:
  164. opts, args = getopt.getopt(sys.argv[1:], 'td')
  165. except getopt.error as msg:
  166. sys.stdout = sys.stderr
  167. print(msg)
  168. print("usage: quopri [-t | -d] [file] ...")
  169. print("-t: quote tabs")
  170. print("-d: decode; default encode")
  171. sys.exit(2)
  172. deco = False
  173. tabs = False
  174. for o, a in opts:
  175. if o == '-t': tabs = True
  176. if o == '-d': deco = True
  177. if tabs and deco:
  178. sys.stdout = sys.stderr
  179. print("-t and -d are mutually exclusive")
  180. sys.exit(2)
  181. if not args: args = ['-']
  182. sts = 0
  183. for file in args:
  184. if file == '-':
  185. fp = sys.stdin.buffer
  186. else:
  187. try:
  188. fp = open(file, "rb")
  189. except OSError as msg:
  190. sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
  191. sts = 1
  192. continue
  193. try:
  194. if deco:
  195. decode(fp, sys.stdout.buffer)
  196. else:
  197. encode(fp, sys.stdout.buffer, tabs)
  198. finally:
  199. if file != '-':
  200. fp.close()
  201. if sts:
  202. sys.exit(sts)
  203. if __name__ == '__main__':
  204. main()