$B4A;z$N%j%s%/9=B$(B

Yoshi Fujiwara yfujiwar @ crl.go.jp
2002年 11月 12日 (火) 23:30:55 JST


藤原です.

>> また、以前藤原さんが作っていた漢字間のリンクのデータは、どのように

今お茶の水に来ていて,動作確認できないのですが,一応使ったプログラムを
下記に並べます.これは守岡さんの作ってくれたコードを改変したものです.
このままでは最新版UTF-2000では動かないかも(^^;).

順番にlispで評価させた記憶があります.最後にはucs番号に落としました.

部品 漢字1 漢字2 ... 漢字n

-503280443 5 15479313 15480233 15481497 15481575 15482156
-503280433 3 15479217 15479417 15480606
.....
とかです(たしか).

# OpenGLで江渡さんに質問があるのですが,メールで聞けますか.

------------------------------

; --------------------------------------------------
; char2ucs converts char to ucs if available
; --------------------------------------------------
(defun char2ucs (char)
  (if (characterp char)
      (if (get-char-attribute char 'ucs)
          char
        (let ((ucs (or (get-char-attribute char '=>ucs)
                       (get-char-attribute char '->ucs))))
          (if ucs
              (decode-char 'ucs ucs)
            char)))
    char))

; --------------------------------------------------
; check if a component is a part of a character
; --------------------------------------------------
(defun ideographic-member (component char)
  (setq char (char2ucs char))
  (if (char-ref= char component)
      t
    (let ((str
           (cond ((characterp char)
                  ;; character
                  (get-char-attribute char 'ideographic-structure))
                 ((listp char)
                  (if (listp (car char))
                      ;; char-spec
                      (cdr (assq 'ideographic-structure char))
                    ;; char-ref
                    (setq char (plist-get char :char))
                    (cond ((characterp char)
                           ;; char
                           (get-char-attribute
                            char
                            'ideographic-structure))
                          ((listp char)
                           (if (listp (car char))
                               ;; char-ref
                               (cdr
                                (assq 'ideographic-structure char)))
                           )))))))
      (when str
        (member* component str :test #'ideographic-member)))))

; --------------------------------------------------
; decompose a character recursively into components
; --------------------------------------------------
(defun ideographic-structure-get-parts (char &optional terminal-only
                                             include-non-terminal-myself)
  (setq char (char2ucs char))
  (let ((struct nil) dest ret (char0 char))
    (if (characterp char)
        (setq struct (get-char-attribute char 'ideographic-structure))
      (if (assq 'ideographic-structure char)
          (setq struct (cdr (assq 'ideographic-structure char)))))
    (cond (struct
           (dolist (component
                    (cdr struct) ; we expect the first element is an operator
                    )
             (if (characterp component)
                 (setq dest
                       (union dest
                              (ideographic-structure-get-parts
                               component terminal-only (not terminal-only))))
               (if (char-ref-p component)
                   (setq component (plist-get component :char)))
               (if (setq ret (cdr (assq 'ideographic-structure component)))
                   (dolist (tmp (cdr ret))
                     (setq dest
                           (union dest
                                  (ideographic-structure-get-parts
                                   tmp terminal-only (not terminal-only)))))
                 (if (setq component (find-char component))
                     (setq dest
                           (union dest
                                  (ideographic-structure-get-parts
                                   component terminal-only (not terminal-only))))))))
           (when (and include-non-terminal-myself
                      (not (memq char dest)))
             (push char dest))
           (delq (find-char '((chinese-big5-cdp . #x8DF5))) dest))  ; delete strange component
          (t
           (if (characterp char)
               (list char)
             (if (setq char (find-char char))
                 (setq dest
                       (union dest
                              (ideographic-structure-get-parts
                               char terminal-only (not terminal-only))))
               (error (format "my-error: %c" char0))))))))

; --------------------------------------------------
; return a list of JIS characters with ideographic radicals
; which have a component part
; --------------------------------------------------
(defun list-jis-with-part (part)
  (let (res)
    (map-char-attribute
     (lambda (char val)
       (if val
           (let (tmp)
             (setq tmp (get-char-attribute char 'ideographic-structure))
             (if tmp
                 (if (member* part tmp :test #'ideographic-member)
                     (push char res)))))
       nil)
     'japanese-jisx0208)
    res))

; --------------------------------------------------
; main programs
; --------------------------------------------------
; list of components
(defvar comps nil)

; For JIS characters with ideographic radicals
; get all the components (without duplication)
(let (dest)
  (map-char-attribute
   (lambda (char val)
     (let (tmp)
       (if val
           (if (get-char-attribute char 'ideographic-radical)
               (progn
                 (setq tmp (ideographic-structure-get-parts char 'terminal-only))
                 (if (> (length tmp) 1)
                     (setq dest
                           (union tmp dest)))))))
     nil)
   'japanese-jisx0208)
  (setq comps dest))

; Then for each such component, return
; char-id #(characters with the component) (char-id's of such characters)
(dolist (x comps)
  (let (list-jis tmp nlist-jis)
    (setq list-jis (list-jis-with-part x))
    (setq tmp (length list-jis))
    (setq nlist-jis nil)
    (dolist (y list-jis)
      (setq nlist-jis (cons (char-int y) nlist-jis)))
    (if (> tmp 1)
        (progn
          (insert (format "%d %d" (char-int x) tmp))
          (dolist (z nlist-jis)
            (insert (format " %d" z)))
          (insert (format "\n"))))))


以上




More information about the CHISE-ja mailing list