;;; pure-cs-japanese.el --- Japanese processor for PURE module -*- coding: ctext; -*- ;; Copyright (C) 2000-2001 by Project Pure. ;; Author: SHIMADA Mitsunobu ;; Keywords: PURE, coding-system, ISO-2022, JISX0201, $BH>3Q%+%J(B ;; $Id: pure-cs-japanese.el,v 1.3 2001/07/28 16:36:15 simm Exp $ ;; This file is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; This file is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;; Boston, MA 02111-1307, USA. ;;; Commentary: ;; PURE means "Primitive Universal Relay-chat Environment" ;; CS means coding-system ;; This module is derived from pure-cs-kana.el and pure-cs-detect.el ;; (pure-cs-kana.el is derived from irchat-pj-jisx0201.el) ;;; Code: (defvar pure-cs-japanese-verbose t "If non-nil, output message when non-ISO-2022-JP string was given.") (defconst pure-cs-japanese-kana-twobytes-character-alist '(("(I6^(B" . "$B%,(B") ("(I7^(B" . "$B%.(B") ("(I8^(B" . "$B%0(B") ("(I9^(B" . "$B%2(B") ("(I:^(B" . "$B%4(B") ("(I;^(B" . "$B%6(B") ("(I<^(B" . "$B%8(B") ("(I=^(B" . "$B%:(B") ("(I>^(B" . "$B%<(B") ("(I?^(B" . "$B%>(B") ("(I@^(B" . "$B%@(B") ("(IA^(B" . "$B%B(B") ("(IB^(B" . "$B%E(B") ("(IC^(B" . "$B%G(B") ("(ID^(B" . "$B%I(B") ("(IJ^(B" . "$B%P(B") ("(IK^(B" . "$B%S(B") ("(IL^(B" . "$B%V(B") ("(IM^(B" . "$B%Y(B") ("(IN^(B" . "$B%\(B") ("(IJ_(B" . "$B%Q(B") ("(IK_(B" . "$B%T(B") ("(IL_(B" . "$B%W(B") ("(IM_(B" . "$B%Z(B") ("(IN_(B" . "$B%](B") ("(I3^(B" . "$B%t(B"))) (defconst pure-cs-japanese-kana-onebyte-character-alist '(("(I'(B" . "$B%!(B") ("(I((B" . "$B%#(B") ("(I)(B" . "$B%%(B") ("(I*(B" . "$B%'(B") ("(I+(B" . "$B%)(B") ("(I,(B" . "$B%c(B") ("(I-(B" . "$B%e(B") ("(I.(B" . "$B%g(B") ("(I/(B" . "$B%C(B") ("(I1(B" . "$B%"(B") ("(I2(B" . "$B%$(B") ("(I3(B" . "$B%&(B") ("(I4(B" . "$B%((B") ("(I5(B" . "$B%*(B") ("(I6(B" . "$B%+(B") ("(I7(B" . "$B%-(B") ("(I8(B" . "$B%/(B") ("(I9(B" . "$B%1(B") ("(I:(B" . "$B%3(B") ("(I;(B" . "$B%5(B") ("(I<(B" . "$B%7(B") ("(I=(B" . "$B%9(B") ("(I>(B" . "$B%;(B") ("(I?(B" . "$B%=(B") ("(I@(B" . "$B%?(B") ("(IA(B" . "$B%A(B") ("(IB(B" . "$B%D(B") ("(IC(B" . "$B%F(B") ("(ID(B" . "$B%H(B") ("(IE(B" . "$B%J(B") ("(IF(B" . "$B%K(B") ("(IG(B" . "$B%L(B") ("(IH(B" . "$B%M(B") ("(II(B" . "$B%N(B") ("(IJ(B" . "$B%O(B") ("(IK(B" . "$B%R(B") ("(IL(B" . "$B%U(B") ("(IM(B" . "$B%X(B") ("(IN(B" . "$B%[(B") ("(IO(B" . "$B%^(B") ("(IP(B" . "$B%_(B") ("(IQ(B" . "$B%`(B") ("(IR(B" . "$B%a(B") ("(IS(B" . "$B%b(B") ("(IT(B" . "$B%d(B") ("(IU(B" . "$B%f(B") ("(IV(B" . "$B%h(B") ("(IW(B" . "$B%i(B") ("(IX(B" . "$B%j(B") ("(IY(B" . "$B%k(B") ("(IZ(B" . "$B%l(B") ("(I[(B" . "$B%m(B") ("(I\(B" . "$B%o(B") ("(I&(B" . "$B%r(B") ("(I](B" . "$B%s(B") ("(I$(B" . "$B!"(B") ("(I!(B" . "$B!#(B") ("(I%(B" . "$B!&(B") ("(I^(B" . "$B!+(B") ("(I_(B" . "$B!,(B") ("(I0(B" . "$B!<(B") ("(I"(B" . "$B!V(B") ("(I#(B" . "$B!W(B"))) (defun pure-cs-japanese-kana-convert-region (beg end) "Convert JISX0201 kana code into JISX0208" (interactive "r") (let (ch) (save-excursion (goto-char beg) (save-restriction (while (re-search-forward "\\ck" end t) (setq ch (char-before)) (cond ((and (= ch ?(I%(B) (looking-at "(I%(B")) (delete-backward-char 1) (if (looking-at "(I%%(B") (progn (delete-char 2) (insert "$B!D(B")) (delete-char 1) (insert "$B!E(B"))) ((looking-at "[(I^_(B]") (backward-char) (setq ch (cdr (assoc (buffer-substring (point) (+ 2 (point))) pure-cs-japanese-kana-twobytes-character-alist))) (if ch (progn (delete-char 2) (insert ch)) (setq ch (cdr (assoc (buffer-substring (point) (1+ (point))) pure-cs-japanese-kana-onebyte-character-alist))) (insert (or ch " ")))) (t (setq ch (cdr (assoc (buffer-substring (1- (point)) (point)) pure-cs-japanese-kana-onebyte-character-alist))) (delete-backward-char 1) (insert (or ch " "))))))))) ;; ;; Detect Japanese SHIFT_JIS or EUC-JP ;; (defsubst pure-cs-japanese-sjis-score (ch sflag) (cond ((and sflag (<= 64 ch) (<= ch 126)) '(1 . nil)) ((and sflag (<= 128 ch) (<= ch 252)) '(1 . nil)) (sflag '(-1 . nil)) ((or (= 130 ch) (= 131 ch)) '(2 . t)) ((and (<= 129 ch) (<= ch 159)) '(1 . t)) ((and (<= 224 ch) (<= ch 239)) '(1 . t)) (t '(0 . nil)))) (defsubst pure-cs-japanese-euc-score (ch eflag) (cond ((and (eq eflag '8f) (<= 161 ch) (<= ch 254)) '(1 . t)) ((eq eflag '8f) '(-1 . nil)) ((and (eq eflag '8e) (<= 161 ch) (<= ch 223)) '(1 . nil)) ((eq eflag '8e) '(-1 . nil)) ((and eflag (<= 161 ch) (<= ch 254)) '(1 . nil)) (eflag '(-1 . nil)) ((= ch 143) '(1 . 8f)) ((= ch 142) '(1 . 8e)) ((= ch 255) '(-1 . nil)) ((and (<= 127 ch) (<= ch 160)) '(-1 . nil)) ((or (= 164 ch) (= 165 ch)) '(2 . t)) ((and (<= 161 ch) (<= ch 254)) '(1 . t)) (t '(0 . nil)))) (defun pure-cs-japanese-detect-region (beg end) "Detect Japanese, SHIFT_JIS or EUC-JP. This function is one of the candidates, `pure-cs-detect-region' and `pure-cs-detect-string'." (unwind-protect (save-restriction (narrow-to-region beg end) (let (ch result (eflag nil) (sflag nil) (escore 0) (sscore 0)) (save-excursion (goto-char beg) (while (< (point) end) (setq ch (following-char)) ;; add SJIS score (setq result (pure-cs-japanese-sjis-score ch sflag) sscore (+ sscore (car result)) sflag (cdr result)) ;; add EUC-JP score (setq result (pure-cs-japanese-euc-score ch eflag) escore (+ escore (car result)) eflag (cdr result)) ;; next (forward-char 1))) (if pure-cs-japanese-verbose (message "PURE: %s code detected" (if (< sscore escore) "EUC-JP" "SHIFT JIS"))) (if (< sscore escore) 'euc-jp 'shift_jis))) (pure-cs-buffer-multibyte))) ;;; That's all (provide 'pure-cs-japanese) ;;; pure-cs-japanese.el ends here