# A codec for ISO-2022-JP-1 [RFC2237]
# Tamito KAJIYAMA <18 December 2000>

import codecs, japanese.c._japanese_codecs
import re

US_ASCII      = 1
JISX0201_1976 = 2
JISX0208_1978 = 3
JISX0208_1983 = 4
JISX0212_1990 = 5

CHARSETS = {
    "\033(B": US_ASCII,
    "\033(J": JISX0201_1976,
    "\033$@": JISX0208_1978,
    "\033$B": JISX0208_1983,
    "\033$(D": JISX0212_1990,
}

DESIGNATIONS = {}
for k, v in CHARSETS.items():
    DESIGNATIONS[v] = k

re_designations = re.compile("\033(\\([BJ]|\\$[@B]|\\$\\(D)")

class Codec(codecs.Codec):
    encode = japanese.c._japanese_codecs.iso_2022_jp_1_encode
    decode = japanese.c._japanese_codecs.iso_2022_jp_1_decode

class StreamWriter(Codec, codecs.StreamWriter):
    pass

class StreamReader(Codec, codecs.StreamReader):
    def __init__(self, stream, errors='strict'):
        codecs.StreamReader.__init__(self, stream, errors)
        self.data = ''
        self.charset = US_ASCII
    def _read(self, func, size):
        if size == 0:
            return u''
        if size is None or size < 0:
            data = self.data + func()
        else:
            data = self.data + func(max(size, 8) - len(self.data))
        self.data = ''
        if self.charset != US_ASCII:
            data = DESIGNATIONS[self.charset] + data
        pos = data.rfind("\033")
        if pos >= 0 and not re_designations.match(data, pos):
            # data ends on the way of an escape sequence
            data, self.data = data[:pos], data[pos:]
            pos = data.rfind("\033")
        if pos >= 0:
            match = re_designations.match(data, pos)
            if not match:
                raise UnicodeError, "unknown designation"
            self.charset = CHARSETS[match.group()]
            if self.charset in [JISX0208_1978, JISX0208_1983, JISX0212_1990] and \
               (len(data) - match.end()) % 2 == 1:
                data, self.data = data[:-1], data[-1]
            if self.charset != US_ASCII:
                data = data + DESIGNATIONS[US_ASCII]
        return self.decode(data, self.errors)[0]
    def read(self, size=-1):
        return self._read(self.stream.read, size)
    def readline(self, size=-1):
        return self._read(self.stream.readline, size)
    def readlines(self, size=-1):
        data = self._read(self.stream.read, size)
        buffer = []
        end = 0
        while 1:
            pos = data.find(u'\n', end)
            if pos < 0:
                if end < len(data):
                    buffer.append(data[end:])
                break
            buffer.append(data[end:pos+1])
            end = pos+1
        return buffer
    def reset(self):
        self.data = ''

### encodings module API

def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)


syntax highlighted by Code2HTML, v. 0.9.1