utf_32.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. """
  2. Python 'utf-32' Codec
  3. """
  4. import codecs, sys
  5. ### Codec APIs
  6. encode = codecs.utf_32_encode
  7. def decode(input, errors='strict'):
  8. return codecs.utf_32_decode(input, errors, True)
  9. class IncrementalEncoder(codecs.IncrementalEncoder):
  10. def __init__(self, errors='strict'):
  11. codecs.IncrementalEncoder.__init__(self, errors)
  12. self.encoder = None
  13. def encode(self, input, final=False):
  14. if self.encoder is None:
  15. result = codecs.utf_32_encode(input, self.errors)[0]
  16. if sys.byteorder == 'little':
  17. self.encoder = codecs.utf_32_le_encode
  18. else:
  19. self.encoder = codecs.utf_32_be_encode
  20. return result
  21. return self.encoder(input, self.errors)[0]
  22. def reset(self):
  23. codecs.IncrementalEncoder.reset(self)
  24. self.encoder = None
  25. def getstate(self):
  26. # state info we return to the caller:
  27. # 0: stream is in natural order for this platform
  28. # 2: endianness hasn't been determined yet
  29. # (we're never writing in unnatural order)
  30. return (2 if self.encoder is None else 0)
  31. def setstate(self, state):
  32. if state:
  33. self.encoder = None
  34. else:
  35. if sys.byteorder == 'little':
  36. self.encoder = codecs.utf_32_le_encode
  37. else:
  38. self.encoder = codecs.utf_32_be_encode
  39. class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
  40. def __init__(self, errors='strict'):
  41. codecs.BufferedIncrementalDecoder.__init__(self, errors)
  42. self.decoder = None
  43. def _buffer_decode(self, input, errors, final):
  44. if self.decoder is None:
  45. (output, consumed, byteorder) = \
  46. codecs.utf_32_ex_decode(input, errors, 0, final)
  47. if byteorder == -1:
  48. self.decoder = codecs.utf_32_le_decode
  49. elif byteorder == 1:
  50. self.decoder = codecs.utf_32_be_decode
  51. elif consumed >= 4:
  52. raise UnicodeError("UTF-32 stream does not start with BOM")
  53. return (output, consumed)
  54. return self.decoder(input, self.errors, final)
  55. def reset(self):
  56. codecs.BufferedIncrementalDecoder.reset(self)
  57. self.decoder = None
  58. def getstate(self):
  59. # additional state info from the base class must be None here,
  60. # as it isn't passed along to the caller
  61. state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
  62. # additional state info we pass to the caller:
  63. # 0: stream is in natural order for this platform
  64. # 1: stream is in unnatural order
  65. # 2: endianness hasn't been determined yet
  66. if self.decoder is None:
  67. return (state, 2)
  68. addstate = int((sys.byteorder == "big") !=
  69. (self.decoder is codecs.utf_32_be_decode))
  70. return (state, addstate)
  71. def setstate(self, state):
  72. # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
  73. codecs.BufferedIncrementalDecoder.setstate(self, state)
  74. state = state[1]
  75. if state == 0:
  76. self.decoder = (codecs.utf_32_be_decode
  77. if sys.byteorder == "big"
  78. else codecs.utf_32_le_decode)
  79. elif state == 1:
  80. self.decoder = (codecs.utf_32_le_decode
  81. if sys.byteorder == "big"
  82. else codecs.utf_32_be_decode)
  83. else:
  84. self.decoder = None
  85. class StreamWriter(codecs.StreamWriter):
  86. def __init__(self, stream, errors='strict'):
  87. self.encoder = None
  88. codecs.StreamWriter.__init__(self, stream, errors)
  89. def reset(self):
  90. codecs.StreamWriter.reset(self)
  91. self.encoder = None
  92. def encode(self, input, errors='strict'):
  93. if self.encoder is None:
  94. result = codecs.utf_32_encode(input, errors)
  95. if sys.byteorder == 'little':
  96. self.encoder = codecs.utf_32_le_encode
  97. else:
  98. self.encoder = codecs.utf_32_be_encode
  99. return result
  100. else:
  101. return self.encoder(input, errors)
  102. class StreamReader(codecs.StreamReader):
  103. def reset(self):
  104. codecs.StreamReader.reset(self)
  105. try:
  106. del self.decode
  107. except AttributeError:
  108. pass
  109. def decode(self, input, errors='strict'):
  110. (object, consumed, byteorder) = \
  111. codecs.utf_32_ex_decode(input, errors, 0, False)
  112. if byteorder == -1:
  113. self.decode = codecs.utf_32_le_decode
  114. elif byteorder == 1:
  115. self.decode = codecs.utf_32_be_decode
  116. elif consumed>=4:
  117. raise UnicodeError("UTF-32 stream does not start with BOM")
  118. return (object, consumed)
  119. ### encodings module API
  120. def getregentry():
  121. return codecs.CodecInfo(
  122. name='utf-32',
  123. encode=encode,
  124. decode=decode,
  125. incrementalencoder=IncrementalEncoder,
  126. incrementaldecoder=IncrementalDecoder,
  127. streamreader=StreamReader,
  128. streamwriter=StreamWriter,
  129. )