Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
;; This file: ;; http://angg.twu.net/elisp/derawify.el.html ;; http://angg.twu.net/elisp/derawify.el ;; (find-angg "elisp/derawify.el") ;; Author: Eduardo Ochs <eduardoochs@gmail.com> ;; Date: 2022feb12 ;; Public domain. ;; ;; I still have many files that use the "raw-text-unix" encoding, that ;; is explained here: ;; ;; (find-elnode "Coding System Basics") ;; ;; When I copy text with chars in the range 128-255 (0x80-0xff) from ;; these files to files in the "utf-8-unix" encoding these chars are ;; converted to the range 0x3fff80-0x3fffff, and when I try to save ;; the utf-8 file with those characters Emacs creates a temporary ;; buffer called "*Warning*" with a message like this one: ;; ;; These default coding systems were tried to encode the following ;; problematic characters in the buffer `ooo': ;; Coding System Pos Codepoint Char ;; utf-8 11 #x3FFFE9 é ;; 34 #x3FFFED í ;; 119 #x3FFFE3 ã ;; 193 #x3FFFED í ;; 264 #x3FFFED í ;; ... ;; ;; However, each of them encountered characters it couldn't encode: ;; utf-8 cannot encode these: é í ã í í í ;; ;; Click on a character (or switch to this window by `M-o' ;; and select the characters by RET) to jump to the place it appears, ;; where `C-u C-x =' will give information about it. ;; ;; Select one of the safe coding systems listed below, ;; or cancel the writing with C-g and edit the buffer ;; to remove or modify the problematic characters, ;; or specify any other coding system (and risk losing ;; the problematic characters). ;; ;; raw-text no-conversion ;; ;; I never found the "right" way to make Emacs convert the chars in ;; the range 0x3fff80-0x3fffff (the "bad" range) to the range ;; 0x80-0xff (the "good" range), so I wrote this hack. If we run `M-x ;; derawify' in a buffer it will find all the "bad" characters in that ;; buffer - or, more precisely: the "bad" characters in the accessible ;; portion of the current buffer - and it will change each of them to ;; the corresponding "good" character and mark each one of the ;; converted characters with the face `derawify-face'. ;; Todo: use rx. ;; ;; (find-epp (rx (in 2000 4000))) ;; (rx (in 32 64)) ;; (rx (in #x3fff80 #x3fffff)) ;; (insert "\x3fff80-\x3fffff") (make-face 'derawify-face) (set-face-foreground 'derawify-face nil) (set-face-background 'derawify-face "OliveDrab") (defun derawify-here () (interactive) (let* ((pos (point)) (str1 (delete-and-extract-region pos (1+ pos))) (str2 (char-to-string (logand 255 (string-to-char str1)))) (str3 (propertize str2 'face 'derawify-face))) (insert str3))) (defun derawify () "Convert all characters in the range 0x3fff80-0x3fffff to the range 0x80-0xff. All the \"bad\" characters in accessible portion of the current buffer are converted to corresponding \"good\" characters and marked with the face `derawify-face'." (interactive) (save-excursion (goto-char (point-min)) (while (search-forward-regexp "[\x3fff80-\x3fffff]" nil t) (goto-char (match-beginning 0)) (derawify-here)))) ;; ;; Old version: ;; ;; ;; ;; (find-efunctiondescr 'search-forward-regexp) ;; ;; (replace-match NEWTEXT &optional FIXEDCASE LITERAL STRING SUBEXP) ;; ;; (defun derawify-here-p (pos) ;; (<= #x3fff80 (char-after pos) #x3fffff)) ;; ;; (defun derawify-here-find-first (b e) ;; (cl-loop for pos from b to e ;; do (if (derawify-here-p pos) ;; (cl-return pos)))) ;; ;; (defun derawify-here-find-first (b e) ;; (goto-char b) ;; (search-forward-regexp "[\x3fff80-\x3fffff]" e t)) ;; ;; (defun derawify-here-goto-first (b e) ;; (interactive "r") ;; (goto-char (derawify-here-find-first b e))) ;; ;; (defun derawify-next (e) ;; (let ((pos (derawify-here-find-first b e))) ;; (if pos ;; (progn (goto-char pos) ;; (derawify-here) ;; t)))) ;; ;; (defun derawify-region (b e) ;; "This is like `derawify' but operates only in the region from B to E." ;; (interactive "r") ;; (save-excursion ;; (goto-char b) ;; (while (derawify-next e)))) ;; ;; (defun derawify () ;; "Convert all characters in the range 0x3fff80-0x3fffff to the range 0x80-0xff. ;; All the \"bad\" characters in accessible portion of the current ;; buffer are converted to corresponding \"good\" characters and ;; marked with the face `derawify-face'." ;; (interactive) ;; (derawify-region (point-min) (1- (point-max)))) ;; (defun e () (interactive) (find-angg "elisp/derawify.el")) ;; Local Variables: ;; coding: utf-8-unix ;; End: