quopri.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. #! /usr/bin/env python3
  2. """Conversions to/from quoted-printable transport encoding as per RFC 1521."""
  3. # (Dec 1991 version).
  4. __all__ = ["encode", "decode", "encodestring", "decodestring"]
  5. ESCAPE = b'='
  6. MAXLINESIZE = 76
  7. HEX = b'0123456789ABCDEF'
  8. EMPTYSTRING = b''
  9. try:
  10. from binascii import a2b_qp, b2a_qp
  11. except ImportError:
  12. a2b_qp = None
  13. b2a_qp = None
  14. def needsquoting(c, quotetabs, header):
  15. """Decide whether a particular byte ordinal needs to be quoted.
  16. The 'quotetabs' flag indicates whether embedded tabs and spaces should be
  17. quoted. Note that line-ending tabs and spaces are always encoded, as per
  18. RFC 1521.
  19. """
  20. assert isinstance(c, bytes)
  21. if c in b' \t':
  22. return quotetabs
  23. # if header, we have to escape _ because _ is used to escape space
  24. if c == b'_':
  25. return header
  26. return c == ESCAPE or not (b' ' <= c <= b'~')
  27. def quote(c):
  28. """Quote a single character."""
  29. assert isinstance(c, bytes) and len(c)==1
  30. c = ord(c)
  31. return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
  32. def encode(input, output, quotetabs, header=False):
  33. """Read 'input', apply quoted-printable encoding, and write to 'output'.
  34. 'input' and 'output' are binary file objects. The 'quotetabs' flag
  35. indicates whether embedded tabs and spaces should be quoted. Note that
  36. line-ending tabs and spaces are always encoded, as per RFC 1521.
  37. The 'header' flag indicates whether we are encoding spaces as _ as per RFC
  38. 1522."""
  39. if b2a_qp is not None:
  40. data = input.read()
  41. odata = b2a_qp(data, quotetabs=quotetabs, header=header)
  42. output.write(odata)
  43. return
  44. def write(s, output=output, lineEnd=b'\n'):
  45. # RFC 1521 requires that the line ending in a space or tab must have
  46. # that trailing character encoded.
  47. if s and s[-1:] in b' \t':
  48. output.write(s[:-1] + quote(s[-1:]) + lineEnd)
  49. elif s == b'.':
  50. output.write(quote(s) + lineEnd)
  51. else:
  52. output.write(s + lineEnd)
  53. prevline = None
  54. while 1:
  55. line = input.readline()
  56. if not line:
  57. break
  58. outline = []
  59. # Strip off any readline induced trailing newline
  60. stripped = b''
  61. if line[-1:] == b'\n':
  62. line = line[:-1]
  63. stripped = b'\n'
  64. # Calculate the un-length-limited encoded line
  65. for c in line:
  66. c = bytes((c,))
  67. if needsquoting(c, quotetabs, header):
  68. c = quote(c)
  69. if header and c == b' ':
  70. outline.append(b'_')
  71. else:
  72. outline.append(c)
  73. # First, write out the previous line
  74. if prevline is not None:
  75. write(prevline)
  76. # Now see if we need any soft line breaks because of RFC-imposed
  77. # length limitations. Then do the thisline->prevline dance.
  78. thisline = EMPTYSTRING.join(outline)
  79. while len(thisline) > MAXLINESIZE:
  80. # Don't forget to include the soft line break `=' sign in the
  81. # length calculation!
  82. write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
  83. thisline = thisline[MAXLINESIZE-1:]
  84. # Write out the current line
  85. prevline = thisline
  86. # Write out the last line, without a trailing newline
  87. if prevline is not None:
  88. write(prevline, lineEnd=stripped)
  89. def encodestring(s, quotetabs=False, header=False):
  90. if b2a_qp is not None:
  91. return b2a_qp(s, quotetabs=quotetabs, header=header)
  92. from io import BytesIO
  93. infp = BytesIO(s)
  94. outfp = BytesIO()
  95. encode(infp, outfp, quotetabs, header)
  96. return outfp.getvalue()
  97. def decode(input, output, header=False):
  98. """Read 'input', apply quoted-printable decoding, and write to 'output'.
  99. 'input' and 'output' are binary file objects.
  100. If 'header' is true, decode underscore as space (per RFC 1522)."""
  101. if a2b_qp is not None:
  102. data = input.read()
  103. odata = a2b_qp(data, header=header)
  104. output.write(odata)
  105. return
  106. new = b''
  107. while 1:
  108. line = input.readline()
  109. if not line: break
  110. i, n = 0, len(line)
  111. if n > 0 and line[n-1:n] == b'\n':
  112. partial = 0; n = n-1
  113. # Strip trailing whitespace
  114. while n > 0 and line[n-1:n] in b" \t\r":
  115. n = n-1
  116. else:
  117. partial = 1
  118. while i < n:
  119. c = line[i:i+1]
  120. if c == b'_' and header:
  121. new = new + b' '; i = i+1
  122. elif c != ESCAPE:
  123. new = new + c; i = i+1
  124. elif i+1 == n and not partial:
  125. partial = 1; break
  126. elif i+1 < n and line[i+1:i+2] == ESCAPE:
  127. new = new + ESCAPE; i = i+2
  128. elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
  129. new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
  130. else: # Bad escape sequence -- leave it in
  131. new = new + c; i = i+1
  132. if not partial:
  133. output.write(new + b'\n')
  134. new = b''
  135. if new:
  136. output.write(new)
  137. def decodestring(s, header=False):
  138. if a2b_qp is not None:
  139. return a2b_qp(s, header=header)
  140. from io import BytesIO
  141. infp = BytesIO(s)
  142. outfp = BytesIO()
  143. decode(infp, outfp, header=header)
  144. return outfp.getvalue()
  145. # Other helper functions
  146. def ishex(c):
  147. """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
  148. assert isinstance(c, bytes)
  149. return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
  150. def unhex(s):
  151. """Get the integer value of a hexadecimal number."""
  152. bits = 0
  153. for c in s:
  154. c = bytes((c,))
  155. if b'0' <= c <= b'9':
  156. i = ord('0')
  157. elif b'a' <= c <= b'f':
  158. i = ord('a')-10
  159. elif b'A' <= c <= b'F':
  160. i = ord(b'A')-10
  161. else:
  162. assert False, "non-hex digit "+repr(c)
  163. bits = bits*16 + (ord(c) - i)
  164. return bits
  165. def main():
  166. import sys
  167. import getopt
  168. try:
  169. opts, args = getopt.getopt(sys.argv[1:], 'td')
  170. except getopt.error as msg:
  171. sys.stdout = sys.stderr
  172. print(msg)
  173. print("usage: quopri [-t | -d] [file] ...")
  174. print("-t: quote tabs")
  175. print("-d: decode; default encode")
  176. sys.exit(2)
  177. deco = False
  178. tabs = False
  179. for o, a in opts:
  180. if o == '-t': tabs = True
  181. if o == '-d': deco = True
  182. if tabs and deco:
  183. sys.stdout = sys.stderr
  184. print("-t and -d are mutually exclusive")
  185. sys.exit(2)
  186. if not args: args = ['-']
  187. sts = 0
  188. for file in args:
  189. if file == '-':
  190. fp = sys.stdin.buffer
  191. else:
  192. try:
  193. fp = open(file, "rb")
  194. except OSError as msg:
  195. sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
  196. sts = 1
  197. continue
  198. try:
  199. if deco:
  200. decode(fp, sys.stdout.buffer)
  201. else:
  202. encode(fp, sys.stdout.buffer, tabs)
  203. finally:
  204. if file != '-':
  205. fp.close()
  206. if sts:
  207. sys.exit(sts)
  208. if __name__ == '__main__':
  209. main()