$B4A;z$N%j%s%/9=B$(B
Yoshi Fujiwara
yfujiwar @ crl.go.jp
2002年 11月 12日 (火) 23:30:55 JST
藤原です.
>> また、以前藤原さんが作っていた漢字間のリンクのデータは、どのように
今お茶の水に来ていて,動作確認できないのですが,一応使ったプログラムを
下記に並べます.これは守岡さんの作ってくれたコードを改変したものです.
このままでは最新版UTF-2000では動かないかも(^^;).
順番にlispで評価させた記憶があります.最後にはucs番号に落としました.
部品 漢字1 漢字2 ... 漢字n
-503280443 5 15479313 15480233 15481497 15481575 15482156
-503280433 3 15479217 15479417 15480606
.....
とかです(たしか).
# OpenGLで江渡さんに質問があるのですが,メールで聞けますか.
------------------------------
; --------------------------------------------------
; char2ucs converts char to ucs if available
; --------------------------------------------------
(defun char2ucs (char)
(if (characterp char)
(if (get-char-attribute char 'ucs)
char
(let ((ucs (or (get-char-attribute char '=>ucs)
(get-char-attribute char '->ucs))))
(if ucs
(decode-char 'ucs ucs)
char)))
char))
; --------------------------------------------------
; check if a component is a part of a character
; --------------------------------------------------
(defun ideographic-member (component char)
(setq char (char2ucs char))
(if (char-ref= char component)
t
(let ((str
(cond ((characterp char)
;; character
(get-char-attribute char 'ideographic-structure))
((listp char)
(if (listp (car char))
;; char-spec
(cdr (assq 'ideographic-structure char))
;; char-ref
(setq char (plist-get char :char))
(cond ((characterp char)
;; char
(get-char-attribute
char
'ideographic-structure))
((listp char)
(if (listp (car char))
;; char-ref
(cdr
(assq 'ideographic-structure char)))
)))))))
(when str
(member* component str :test #'ideographic-member)))))
; --------------------------------------------------
; decompose a character recursively into components
; --------------------------------------------------
(defun ideographic-structure-get-parts (char &optional terminal-only
include-non-terminal-myself)
(setq char (char2ucs char))
(let ((struct nil) dest ret (char0 char))
(if (characterp char)
(setq struct (get-char-attribute char 'ideographic-structure))
(if (assq 'ideographic-structure char)
(setq struct (cdr (assq 'ideographic-structure char)))))
(cond (struct
(dolist (component
(cdr struct) ; we expect the first element is an operator
)
(if (characterp component)
(setq dest
(union dest
(ideographic-structure-get-parts
component terminal-only (not terminal-only))))
(if (char-ref-p component)
(setq component (plist-get component :char)))
(if (setq ret (cdr (assq 'ideographic-structure component)))
(dolist (tmp (cdr ret))
(setq dest
(union dest
(ideographic-structure-get-parts
tmp terminal-only (not terminal-only)))))
(if (setq component (find-char component))
(setq dest
(union dest
(ideographic-structure-get-parts
component terminal-only (not terminal-only))))))))
(when (and include-non-terminal-myself
(not (memq char dest)))
(push char dest))
(delq (find-char '((chinese-big5-cdp . #x8DF5))) dest)) ; delete strange component
(t
(if (characterp char)
(list char)
(if (setq char (find-char char))
(setq dest
(union dest
(ideographic-structure-get-parts
char terminal-only (not terminal-only))))
(error (format "my-error: %c" char0))))))))
; --------------------------------------------------
; return a list of JIS characters with ideographic radicals
; which have a component part
; --------------------------------------------------
(defun list-jis-with-part (part)
(let (res)
(map-char-attribute
(lambda (char val)
(if val
(let (tmp)
(setq tmp (get-char-attribute char 'ideographic-structure))
(if tmp
(if (member* part tmp :test #'ideographic-member)
(push char res)))))
nil)
'japanese-jisx0208)
res))
; --------------------------------------------------
; main programs
; --------------------------------------------------
; list of components
(defvar comps nil)
; For JIS characters with ideographic radicals
; get all the components (without duplication)
(let (dest)
(map-char-attribute
(lambda (char val)
(let (tmp)
(if val
(if (get-char-attribute char 'ideographic-radical)
(progn
(setq tmp (ideographic-structure-get-parts char 'terminal-only))
(if (> (length tmp) 1)
(setq dest
(union tmp dest)))))))
nil)
'japanese-jisx0208)
(setq comps dest))
; Then for each such component, return
; char-id #(characters with the component) (char-id's of such characters)
(dolist (x comps)
(let (list-jis tmp nlist-jis)
(setq list-jis (list-jis-with-part x))
(setq tmp (length list-jis))
(setq nlist-jis nil)
(dolist (y list-jis)
(setq nlist-jis (cons (char-int y) nlist-jis)))
(if (> tmp 1)
(progn
(insert (format "%d %d" (char-int x) tmp))
(dolist (z nlist-jis)
(insert (format " %d" z)))
(insert (format "\n"))))))
以上
More information about the CHISE-ja
mailing list