utf_16.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. """ Python 'utf-16' Codec
  2. Written by Marc-Andre Lemburg (mal@lemburg.com).
  3. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  4. """
  5. import codecs, sys
  6. ### Codec APIs
  7. encode = codecs.utf_16_encode
  8. def decode(input, errors='strict'):
  9. return codecs.utf_16_decode(input, errors, True)
  10. class IncrementalEncoder(codecs.IncrementalEncoder):
  11. def __init__(self, errors='strict'):
  12. codecs.IncrementalEncoder.__init__(self, errors)
  13. self.encoder = None
  14. def encode(self, input, final=False):
  15. if self.encoder is None:
  16. result = codecs.utf_16_encode(input, self.errors)[0]
  17. if sys.byteorder == 'little':
  18. self.encoder = codecs.utf_16_le_encode
  19. else:
  20. self.encoder = codecs.utf_16_be_encode
  21. return result
  22. return self.encoder(input, self.errors)[0]
  23. def reset(self):
  24. codecs.IncrementalEncoder.reset(self)
  25. self.encoder = None
  26. def getstate(self):
  27. # state info we return to the caller:
  28. # 0: stream is in natural order for this platform
  29. # 2: endianness hasn't been determined yet
  30. # (we're never writing in unnatural order)
  31. return (2 if self.encoder is None else 0)
  32. def setstate(self, state):
  33. if state:
  34. self.encoder = None
  35. else:
  36. if sys.byteorder == 'little':
  37. self.encoder = codecs.utf_16_le_encode
  38. else:
  39. self.encoder = codecs.utf_16_be_encode
  40. class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
  41. def __init__(self, errors='strict'):
  42. codecs.BufferedIncrementalDecoder.__init__(self, errors)
  43. self.decoder = None
  44. def _buffer_decode(self, input, errors, final):
  45. if self.decoder is None:
  46. (output, consumed, byteorder) = \
  47. codecs.utf_16_ex_decode(input, errors, 0, final)
  48. if byteorder == -1:
  49. self.decoder = codecs.utf_16_le_decode
  50. elif byteorder == 1:
  51. self.decoder = codecs.utf_16_be_decode
  52. elif consumed >= 2:
  53. raise UnicodeError("UTF-16 stream does not start with BOM")
  54. return (output, consumed)
  55. return self.decoder(input, self.errors, final)
  56. def reset(self):
  57. codecs.BufferedIncrementalDecoder.reset(self)
  58. self.decoder = None
  59. def getstate(self):
  60. # additional state info from the base class must be None here,
  61. # as it isn't passed along to the caller
  62. state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
  63. # additional state info we pass to the caller:
  64. # 0: stream is in natural order for this platform
  65. # 1: stream is in unnatural order
  66. # 2: endianness hasn't been determined yet
  67. if self.decoder is None:
  68. return (state, 2)
  69. addstate = int((sys.byteorder == "big") !=
  70. (self.decoder is codecs.utf_16_be_decode))
  71. return (state, addstate)
  72. def setstate(self, state):
  73. # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
  74. codecs.BufferedIncrementalDecoder.setstate(self, state)
  75. state = state[1]
  76. if state == 0:
  77. self.decoder = (codecs.utf_16_be_decode
  78. if sys.byteorder == "big"
  79. else codecs.utf_16_le_decode)
  80. elif state == 1:
  81. self.decoder = (codecs.utf_16_le_decode
  82. if sys.byteorder == "big"
  83. else codecs.utf_16_be_decode)
  84. else:
  85. self.decoder = None
  86. class StreamWriter(codecs.StreamWriter):
  87. def __init__(self, stream, errors='strict'):
  88. codecs.StreamWriter.__init__(self, stream, errors)
  89. self.encoder = None
  90. def reset(self):
  91. codecs.StreamWriter.reset(self)
  92. self.encoder = None
  93. def encode(self, input, errors='strict'):
  94. if self.encoder is None:
  95. result = codecs.utf_16_encode(input, errors)
  96. if sys.byteorder == 'little':
  97. self.encoder = codecs.utf_16_le_encode
  98. else:
  99. self.encoder = codecs.utf_16_be_encode
  100. return result
  101. else:
  102. return self.encoder(input, errors)
  103. class StreamReader(codecs.StreamReader):
  104. def reset(self):
  105. codecs.StreamReader.reset(self)
  106. try:
  107. del self.decode
  108. except AttributeError:
  109. pass
  110. def decode(self, input, errors='strict'):
  111. (object, consumed, byteorder) = \
  112. codecs.utf_16_ex_decode(input, errors, 0, False)
  113. if byteorder == -1:
  114. self.decode = codecs.utf_16_le_decode
  115. elif byteorder == 1:
  116. self.decode = codecs.utf_16_be_decode
  117. elif consumed>=2:
  118. raise UnicodeError("UTF-16 stream does not start with BOM")
  119. return (object, consumed)
  120. ### encodings module API
  121. def getregentry():
  122. return codecs.CodecInfo(
  123. name='utf-16',
  124. encode=encode,
  125. decode=decode,
  126. incrementalencoder=IncrementalEncoder,
  127. incrementaldecoder=IncrementalDecoder,
  128. streamreader=StreamReader,
  129. streamwriter=StreamWriter,
  130. )