yarv-diff:323
From: ko1 atdot.net
Date: 13 Apr 2006 08:18:51 -0000
Subject: [yarv-diff:323] r490 - in trunk: . ext ext/nkf ext/nkf/lib ext/nkf/nkf-utf8 ext/socket ext/win32ole ext/win32ole/sample ext/win32ole/tests lib lib/irb lib/net lib/rdoc/parsers lib/rdoc/ri lib/test/unit/collector lib/webrick/httpservlet test/digest test/pathname test/ruby win32
Author: ko1
Date: 2006-04-13 17:18:48 +0900 (Thu, 13 Apr 2006)
New Revision: 490
Modified:
trunk/ChangeLog
trunk/bignum.c
trunk/dir.c
trunk/enumerator.c
trunk/ext/.document
trunk/ext/extmk.rb
trunk/ext/nkf/lib/kconv.rb
trunk/ext/nkf/nkf-utf8/nkf.c
trunk/ext/nkf/nkf-utf8/utf8tbl.c
trunk/ext/nkf/nkf.c
trunk/ext/nkf/test.rb
trunk/ext/socket/.cvsignore
trunk/ext/win32ole/sample/excel2.rb
trunk/ext/win32ole/tests/testOLEMETHOD.rb
trunk/ext/win32ole/tests/testOLEPARAM.rb
trunk/ext/win32ole/tests/testOLETYPE.rb
trunk/ext/win32ole/tests/testOLETYPELIB.rb
trunk/ext/win32ole/tests/testOLEVARIABLE.rb
trunk/ext/win32ole/tests/testOLEVARIANT.rb
trunk/ext/win32ole/tests/testWIN32OLE.rb
trunk/ext/win32ole/tests/testall.rb
trunk/ext/win32ole/win32ole.c
trunk/gc.c
trunk/instruby.rb
trunk/io.c
trunk/lib/delegate.rb
trunk/lib/fileutils.rb
trunk/lib/find.rb
trunk/lib/irb/ruby-lex.rb
trunk/lib/mkmf.rb
trunk/lib/net/http.rb
trunk/lib/open-uri.rb
trunk/lib/pathname.rb
trunk/lib/rational.rb
trunk/lib/rdoc/parsers/parse_rb.rb
trunk/lib/rdoc/ri/ri_paths.rb
trunk/lib/resolv.rb
trunk/lib/test/unit/collector/objectspace.rb
trunk/lib/webrick/httpservlet/cgihandler.rb
trunk/math.c
trunk/mkconfig.rb
trunk/object.c
trunk/oniguruma.h
trunk/pack.c
trunk/parse.y
trunk/re.c
trunk/re.h
trunk/regcomp.c
trunk/regerror.c
trunk/regparse.c
trunk/ruby.h
trunk/rubytest.rb
trunk/runruby.rb
trunk/string.c
trunk/test/digest/test_digest.rb
trunk/test/pathname/test_pathname.rb
trunk/test/ruby/envutil.rb
trunk/test/ruby/test_float.rb
trunk/test/ruby/test_pack.rb
trunk/time.c
trunk/util.c
trunk/version.h
trunk/win32/mkexports.rb
trunk/win32/resource.rb
trunk/win32/win32.c
Log:
* bignum.c : import ruby 1.9 HEAD (Ruby 1.9.0 2006-04-08)
* dir.c : ditto
* enumerator.c : ditto
* ext/.document : ditto
* ext/extmk.rb : ditto
* ext/nkf/lib/kconv.rb : ditto
* ext/nkf/nkf-utf8/nkf.c : ditto
* ext/nkf/nkf-utf8/utf8tbl.c : ditto
* ext/nkf/nkf.c : ditto
* ext/nkf/test.rb : ditto
* ext/socket/.cvsignore : ditto
* ext/win32ole/sample/excel2.rb : ditto
* ext/win32ole/tests/testOLEMETHOD.rb : ditto
* ext/win32ole/tests/testOLEPARAM.rb : ditto
* ext/win32ole/tests/testOLETYPE.rb : ditto
* ext/win32ole/tests/testOLETYPELIB.rb : ditto
* ext/win32ole/tests/testOLEVARIABLE.rb : ditto
* ext/win32ole/tests/testOLEVARIANT.rb : ditto
* ext/win32ole/tests/testWIN32OLE.rb : ditto
* ext/win32ole/tests/testall.rb : ditto
* ext/win32ole/win32ole.c : ditto
* gc.c : ditto
* instruby.rb : ditto
* io.c : ditto
* lib/delegate.rb : ditto
* lib/fileutils.rb : ditto
* lib/find.rb : ditto
* lib/irb/ruby-lex.rb : ditto
* lib/mkmf.rb : ditto
* lib/net/http.rb : ditto
* lib/open-uri.rb : ditto
* lib/pathname.rb : ditto
* lib/rational.rb : ditto
* lib/rdoc/parsers/parse_rb.rb : ditto
* lib/rdoc/ri/ri_paths.rb : ditto
* lib/resolv.rb : ditto
* lib/test/unit/collector/objectspace.rb : ditto
* lib/webrick/httpservlet/cgihandler.rb : ditto
* math.c : ditto
* mkconfig.rb : ditto
* object.c : ditto
* oniguruma.h : ditto
* pack.c : ditto
* parse.y : ditto
* re.c : ditto
* re.h : ditto
* regcomp.c : ditto
* regerror.c : ditto
* regparse.c : ditto
* ruby.h : ditto
* rubytest.rb : ditto
* runruby.rb : ditto
* string.c : ditto
* test/digest/test_digest.rb : ditto
* test/pathname/test_pathname.rb : ditto
* test/ruby/envutil.rb : ditto
* test/ruby/test_float.rb : ditto
* test/ruby/test_pack.rb : ditto
* time.c : ditto
* util.c : ditto
* version.h : ditto
* win32/mkexports.rb : ditto
* win32/resource.rb : ditto
* win32/win32.c : ditto
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/ChangeLog 2006-04-13 08:18:48 UTC (rev 490)
@@ -4,6 +4,137 @@
# from Mon, 03 May 2004 01:24:19 +0900
#
+2006-04-13(Thu) 17:11:30 +0900 Koichi Sasada <ko1 atdot.net>
+
+ * bignum.c : import ruby 1.9 HEAD (Ruby 1.9.0 2006-04-08)
+
+ * dir.c : ditto
+
+ * enumerator.c : ditto
+
+ * ext/.document : ditto
+
+ * ext/extmk.rb : ditto
+
+ * ext/nkf/lib/kconv.rb : ditto
+
+ * ext/nkf/nkf-utf8/nkf.c : ditto
+
+ * ext/nkf/nkf-utf8/utf8tbl.c : ditto
+
+ * ext/nkf/nkf.c : ditto
+
+ * ext/nkf/test.rb : ditto
+
+ * ext/socket/.cvsignore : ditto
+
+ * ext/win32ole/sample/excel2.rb : ditto
+
+ * ext/win32ole/tests/testOLEMETHOD.rb : ditto
+
+ * ext/win32ole/tests/testOLEPARAM.rb : ditto
+
+ * ext/win32ole/tests/testOLETYPE.rb : ditto
+
+ * ext/win32ole/tests/testOLETYPELIB.rb : ditto
+
+ * ext/win32ole/tests/testOLEVARIABLE.rb : ditto
+
+ * ext/win32ole/tests/testOLEVARIANT.rb : ditto
+
+ * ext/win32ole/tests/testWIN32OLE.rb : ditto
+
+ * ext/win32ole/tests/testall.rb : ditto
+
+ * ext/win32ole/win32ole.c : ditto
+
+ * gc.c : ditto
+
+ * instruby.rb : ditto
+
+ * io.c : ditto
+
+ * lib/delegate.rb : ditto
+
+ * lib/fileutils.rb : ditto
+
+ * lib/find.rb : ditto
+
+ * lib/irb/ruby-lex.rb : ditto
+
+ * lib/mkmf.rb : ditto
+
+ * lib/net/http.rb : ditto
+
+ * lib/open-uri.rb : ditto
+
+ * lib/pathname.rb : ditto
+
+ * lib/rational.rb : ditto
+
+ * lib/rdoc/parsers/parse_rb.rb : ditto
+
+ * lib/rdoc/ri/ri_paths.rb : ditto
+
+ * lib/resolv.rb : ditto
+
+ * lib/test/unit/collector/objectspace.rb : ditto
+
+ * lib/webrick/httpservlet/cgihandler.rb : ditto
+
+ * math.c : ditto
+
+ * mkconfig.rb : ditto
+
+ * object.c : ditto
+
+ * oniguruma.h : ditto
+
+ * pack.c : ditto
+
+ * parse.y : ditto
+
+ * re.c : ditto
+
+ * re.h : ditto
+
+ * regcomp.c : ditto
+
+ * regerror.c : ditto
+
+ * regparse.c : ditto
+
+ * ruby.h : ditto
+
+ * rubytest.rb : ditto
+
+ * runruby.rb : ditto
+
+ * string.c : ditto
+
+ * test/digest/test_digest.rb : ditto
+
+ * test/pathname/test_pathname.rb : ditto
+
+ * test/ruby/envutil.rb : ditto
+
+ * test/ruby/test_float.rb : ditto
+
+ * test/ruby/test_pack.rb : ditto
+
+ * time.c : ditto
+
+ * util.c : ditto
+
+ * version.h : ditto
+
+ * win32/mkexports.rb : ditto
+
+ * win32/resource.rb : ditto
+
+ * win32/win32.c : ditto
+
+
2006-04-11(Tue) 11:26:53 +0900 Koichi Sasada <ko1 atdot.net>
* rb/yasm.rb : move to lib/yasm.rb
Modified: trunk/bignum.c
===================================================================
--- trunk/bignum.c 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/bignum.c 2006-04-13 08:18:48 UTC (rev 490)
@@ -2,8 +2,8 @@
bignum.c -
- $Author: akr $
- $Date: 2005/12/16 18:59:01 $
+ $Author: ocean $
+ $Date: 2006/03/01 10:06:02 $
created at: Fri Jun 10 00:48:55 JST 1994
Copyright (C) 1993-2003 Yukihiro Matsumoto
@@ -94,7 +94,10 @@
static VALUE
bignorm(VALUE x)
{
- if (!FIXNUM_P(x)) {
+ if (FIXNUM_P(x)) {
+ return x;
+ }
+ else if (TYPE(x) == T_BIGNUM) {
long len = RBIGNUM(x)->len;
BDIGIT *ds = BDIGITS(x);
@@ -255,9 +258,7 @@
#define QUAD_SIZE 8
void
-rb_quad_pack(buf, val)
- char *buf;
- VALUE val;
+rb_quad_pack(char *buf, VALUE val)
{
long len;
@@ -283,9 +284,7 @@
#define BNEG(b) (RSHIFT(((BDIGIT*)b)[QUAD_SIZE/SIZEOF_BDIGITS-1],BITSPERDIG-1) != 0)
VALUE
-rb_quad_unpack(buf, sign)
- const char *buf;
- int sign;
+rb_quad_unpack(const char *buf, int sign)
{
VALUE big = bignew(QUAD_SIZE / SIZEOF_BDIGITS, 1);
Modified: trunk/dir.c
===================================================================
--- trunk/dir.c 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/dir.c 2006-04-13 08:18:48 UTC (rev 490)
@@ -2,8 +2,8 @@
dir.c -
- $Author: nobu $
- $Date: 2005/12/14 14:40:14 $
+ $Author: ocean $
+ $Date: 2006/03/01 10:06:03 $
created at: Wed Jan 5 09:51:01 JST 1994
Copyright (C) 1993-2003 Yukihiro Matsumoto
@@ -1745,7 +1745,7 @@
* File.fnmatch('*', '.profile', File::FNM_DOTMATCH) #=> true period by default.
* File.fnmatch('.*', '.profile') #=> true
*
- * rbfiles = File.join("**", "*.rb")
+ * rbfiles = '**' '/' '*.rb' # you don't have to do like this. just write in single string.
* File.fnmatch(rbfiles, 'main.rb') #=> false
* File.fnmatch(rbfiles, './main.rb') #=> false
* File.fnmatch(rbfiles, 'lib/song.rb') #=> true
@@ -1754,14 +1754,16 @@
* File.fnmatch('**.rb', 'lib/song.rb') #=> true
* File.fnmatch('*', 'dave/.profile') #=> true
*
- * File.fnmatch('* IGNORE /*', 'dave/.profile', File::FNM_PATHNAME) #=> false
- * File.fnmatch('* IGNORE /*', 'dave/.profile', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true
+ * pattern = '*' '/' '*'
+ * File.fnmatch(pattern, 'dave/.profile', File::FNM_PATHNAME) #=> false
+ * File.fnmatch(pattern, 'dave/.profile', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true
*
- * File.fnmatch('** IGNORE /foo', 'a/b/c/foo', File::FNM_PATHNAME) #=> true
- * File.fnmatch('** IGNORE /foo', '/a/b/c/foo', File::FNM_PATHNAME) #=> true
- * File.fnmatch('** IGNORE /foo', 'c:/a/b/c/foo', File::FNM_PATHNAME) #=> true
- * File.fnmatch('** IGNORE /foo', 'a/.b/c/foo', File::FNM_PATHNAME) #=> false
- * File.fnmatch('** IGNORE /foo', 'a/.b/c/foo', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true
+ * pattern = '**' '/' 'foo'
+ * File.fnmatch(pattern, 'a/b/c/foo', File::FNM_PATHNAME) #=> true
+ * File.fnmatch(pattern, '/a/b/c/foo', File::FNM_PATHNAME) #=> true
+ * File.fnmatch(pattern, 'c:/a/b/c/foo', File::FNM_PATHNAME) #=> true
+ * File.fnmatch(pattern, 'a/.b/c/foo', File::FNM_PATHNAME) #=> false
+ * File.fnmatch(pattern, 'a/.b/c/foo', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true
*/
static VALUE
file_s_fnmatch(int argc, VALUE *argv, VALUE obj)
Modified: trunk/enumerator.c
===================================================================
--- trunk/enumerator.c 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/enumerator.c 2006-04-13 08:18:48 UTC (rev 490)
@@ -2,13 +2,13 @@
enumerator.c - provides Enumerator class
- $Author: matz $
+ $Author: nobu $
Copyright (C) 2001-2003 Akinori MUSHA
$Idaemons: /home/cvs/rb/enumerator/enumerator.c,v 1.1.1.1 2001/07/15 10:12:48 knu Exp $
$RoughId: enumerator.c,v 1.6 2003/07/27 11:03:24 nobu Exp $
- $Id: enumerator.c,v 1.10 2006/02/03 09:15:42 matz Exp $
+ $Id: enumerator.c,v 1.12 2006/03/31 05:25:00 nobu Exp $
************************************************/
@@ -319,10 +319,12 @@
static VALUE
enumerator_each(VALUE obj)
{
- struct enumerator *e = enumerator_ptr(obj);
+ struct enumerator *e;
int argc = 0;
VALUE *argv = 0;
+ if (!rb_block_given_p()) return obj;
+ e = enumerator_ptr(obj);
if (e->args) {
argc = RARRAY(e->args)->len;
argv = RARRAY(e->args)->ptr;
Modified: trunk/ext/.document
===================================================================
--- trunk/ext/.document 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/ext/.document 2006-04-13 08:18:48 UTC (rev 490)
@@ -1,6 +1,8 @@
# Add files to this as they become documented
iconv/iconv.c
+nkf/lib/kconv.rb
+nkf/nkf.c
stringio/stringio.c
strscan/strscan.c
zlib/zlib.c
Modified: trunk/ext/extmk.rb
===================================================================
--- trunk/ext/extmk.rb 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/ext/extmk.rb 2006-04-13 08:18:48 UTC (rev 490)
@@ -64,7 +64,7 @@
unless installrb.empty?
config = CONFIG.dup
install_dirs(target_prefix).each {|var, val| config[var] = val}
- FileUtils.rm_f(installrb.values.collect {|f| Config.expand(f, config)}, verbose: true)
+ FileUtils.rm_f(installrb.values.collect {|f| RbConfig.expand(f, config)}, verbose: true)
end
end
return false
@@ -116,9 +116,9 @@
makefile = "./Makefile"
ok = File.exist?(makefile)
unless $ignore
- Config::CONFIG["hdrdir"] = $hdrdir
- Config::CONFIG["srcdir"] = $srcdir
- Config::CONFIG["topdir"] = $topdir
+ RbConfig::CONFIG["hdrdir"] = $hdrdir
+ RbConfig::CONFIG["srcdir"] = $srcdir
+ RbConfig::CONFIG["topdir"] = $topdir
CONFIG["hdrdir"] = ($hdrdir == top_srcdir) ? top_srcdir : "$(topdir)"+top_srcdir[2..-1]
CONFIG["srcdir"] = "$(hdrdir)/ext/#{$mdir}"
CONFIG["topdir"] = $topdir
@@ -183,8 +183,8 @@
$extpath |= $LIBPATH
end
ensure
- Config::CONFIG["srcdir"] = $top_srcdir
- Config::CONFIG["topdir"] = topdir
+ RbConfig::CONFIG["srcdir"] = $top_srcdir
+ RbConfig::CONFIG["topdir"] = topdir
CONFIG["srcdir"] = mk_srcdir
CONFIG["topdir"] = mk_topdir
CONFIG.delete("hdrdir")
@@ -378,10 +378,10 @@
end
if $extout
- Config.expand(extout = "#$extout", Config::CONFIG.merge("topdir"=>$topdir))
+ RbConfig.expand(extout = "#$extout", RbConfig::CONFIG.merge("topdir"=>$topdir))
if $install
- Config.expand(dest = "#{$destdir}#{$rubylibdir}")
- FileUtils.cp_r(extout+"/.", dest, :verbose => true, :noop => $dryrun)
+ RbConfig.expand(dest = "#{$destdir}#{$rubylibdir}")
+ FileUtils.cp_r(extout+"/.", dest, :remove_destination => true, :verbose => true, :noop => $dryrun)
exit
end
unless $ignore
@@ -472,7 +472,7 @@
$mflags.defined?("DESTDIR") or $mflags << "DESTDIR=#{$destdir}"
end
if !$extlist.empty? and $extupdate
- rm_f(Config::CONFIG["LIBRUBY_SO"])
+ rm_f(RbConfig::CONFIG["LIBRUBY_SO"])
end
puts "making #{rubies.join(', ')}"
$stdout.flush
Modified: trunk/ext/nkf/lib/kconv.rb
===================================================================
--- trunk/ext/nkf/lib/kconv.rb 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/ext/nkf/lib/kconv.rb 2006-04-13 08:18:48 UTC (rev 490)
@@ -1,7 +1,7 @@
#
# kconv.rb - Kanji Converter.
#
-# $Id: kconv.rb,v 1.13 2005/10/08 10:45:51 nobu Exp $
+# $Id: kconv.rb,v 1.14 2006/03/27 13:48:20 naruse Exp $
#
require 'nkf'
@@ -12,35 +12,54 @@
#
#Constant of Encoding
- AUTO = ::NKF::AUTO
- JIS = ::NKF::JIS
- EUC = ::NKF::EUC
- SJIS = ::NKF::SJIS
- BINARY = ::NKF::BINARY
- NOCONV = ::NKF::NOCONV
- ASCII = ::NKF::ASCII
- UTF8 = ::NKF::UTF8
- UTF16 = ::NKF::UTF16
- UTF32 = ::NKF::UTF32
- UNKNOWN = ::NKF::UNKNOWN
+ # Auto-Detect
+ AUTO = NKF::AUTO
+ # ISO-2022-JP
+ JIS = NKF::JIS
+ # EUC-JP
+ EUC = NKF::EUC
+ # Shift_JIS
+ SJIS = NKF::SJIS
+ # BINARY
+ BINARY = NKF::BINARY
+ # NOCONV
+ NOCONV = NKF::NOCONV
+ # ASCII
+ ASCII = NKF::ASCII
+ # UTF-8
+ UTF8 = NKF::UTF8
+ # UTF-16
+ UTF16 = NKF::UTF16
+ # UTF-32
+ UTF32 = NKF::UTF32
+ # UNKNOWN
+ UNKNOWN = NKF::UNKNOWN
+
#
# Private Constants
#
- REVISION = %q$Revision: 1.13 $
+ # Revision of kconv.rb
+ REVISION = %q$Revision: 1.14 $
#Regexp of Encoding
+
+ # Regexp of Shift_JIS string (private constant)
RegexpShiftjis = /\A(?:
[\x00-\x7f\xa1-\xdf] |
[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
)*\z/nx
+
+ # Regexp of EUC-JP string (private constant)
RegexpEucjp = /\A(?:
[\x00-\x7f] |
\x8e [\xa1-\xdf] |
\x8f [\xa1-\xdf] [\xa1-\xfe] |
[\xa1-\xdf] [\xa1-\xfe]
)*\z/nx
+
+ # Regexp of UTF-8 string (private constant)
RegexpUtf8 = /\A(?:
[\x00-\x7f] |
[\xc2-\xdf] [\x80-\xbf] |
@@ -50,227 +69,280 @@
[\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
\xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
)*\z/nx
-
- # SYMBOL_TO_OPTION is the table for Kconv#conv
- # Kconv#conv is intended to generic convertion method,
- # so this table specifies symbols which can be supported not only nkf...
- SYMBOL_TO_OPTION = {
- :iso2022jp => '-j',
- :jis => '-j',
- :eucjp => '-e',
- :euc => '-e',
- :eucjpms => '-e --cp932',
- :shiftjis => '-s',
- :sjis => '-s',
- :cp932 => '-s --cp932',
- :windows31j => '-s --cp932',
- :utf8 => '-w',
- :utf8bom => '-w8',
- :utf8n => '-w80',
- :utf8mac => '-w --utf8mac-input',
- :utf16 => '-w16',
- :utf16be => '-w16B',
- :utf16ben => '-w16B0',
- :utf16le => '-w16L',
- :utf16len => '-w16L0',
- :lf => '-Lu', # LF
- :cr => '-Lm', # CR
- :crlf => '-Lw', # CRLF
- }
-
- CONSTANT_TO_SYMBOL = {
- JIS => :iso2022jp,
- EUC => :eucjp,
- SJIS => :shiftjis,
- BINARY => :binary,
- NOCONV => :noconv,
- ASCII => :ascii,
- UTF8 => :utf8,
- UTF16 => :utf16,
- UTF32 => :utf32,
- UNKNOWN => :unknown
- }
-
+
#
# Public Methods
#
+ # call-seq:
+ # Kconv.kconv(str, out_code, in_code = Kconv::AUTO)
#
- # Kconv.conv( str, :to => :"euc-jp", :from => :shift_jis, :opt => [:hiragana, :katakana] )
+ # Convert <code>str</code> to out_code.
+ # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
#
- def conv(str, *args)
- option = nil
- if args[0].is_a? Hash
- option = [
- args[0][:to]||args[0]['to'],
- args[0][:from]||args[0]['from'],
- args[0][:opt]||args[0]['opt'] ]
- elsif args[0].is_a? String or args[0].is_a? Symbol or args[0].is_a? Integer
- option = args
- else
+ # *Note*
+ # This method decode MIME encoded string and
+ # convert halfwidth katakana to fullwidth katakana.
+ # If you don't want to decode them, use NKF.nkf.
+ def kconv(str, out_code, in_code = AUTO)
+ opt = '-'
+ case in_code
+ when ::NKF::JIS
+ opt << 'J'
+ when ::NKF::EUC
+ opt << 'E'
+ when ::NKF::SJIS
+ opt << 'S'
+ when ::NKF::UTF8
+ opt << 'W'
+ when ::NKF::UTF16
+ opt << 'W16'
+ end
+
+ case out_code
+ when ::NKF::JIS
+ opt << 'j'
+ when ::NKF::EUC
+ opt << 'e'
+ when ::NKF::SJIS
+ opt << 's'
+ when ::NKF::UTF8
+ opt << 'w'
+ when ::NKF::UTF16
+ opt << 'w16'
+ when ::NKF::NOCONV
return str
end
-
- to = symbol_to_option(option[0])
- from = symbol_to_option(option[1]).to_s.sub(/(-[jesw])/o){$1.upcase}
- opt = option[2..-1] and opt = opt.flatten.map{|x|symbol_to_option(x)}.compact.join(' ')
-
- nkf_opt = '-x -m0 %s %s %s' % [to, from, opt]
- result = ::NKF::nkf( nkf_opt, str)
+
+ opt = '' if opt == '-'
+
+ ::NKF::nkf(opt, str)
end
- alias :kconv :conv
+ module_function :kconv
#
# Encode to
#
+ # call-seq:
+ # Kconv.tojis(str) -> string
+ #
+ # Convert <code>str</code> to ISO-2022-JP
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-jxm0', str).
def tojis(str)
- ::NKF::nkf('-j', str)
+ ::NKF::nkf('-jm0', str)
end
+ module_function :tojis
+ # call-seq:
+ # Kconv.toeuc(str) -> string
+ #
+ # Convert <code>str</code> to EUC-JP
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-exm0', str).
def toeuc(str)
- ::NKF::nkf('-e', str)
+ ::NKF::nkf('-em0', str)
end
+ module_function :toeuc
+ # call-seq:
+ # Kconv.tosjis(str) -> string
+ #
+ # Convert <code>str</code> to Shift_JIS
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-sxm0', str).
def tosjis(str)
- ::NKF::nkf('-s', str)
+ ::NKF::nkf('-sm0', str)
end
+ module_function :tosjis
+ # call-seq:
+ # Kconv.toutf8(str) -> string
+ #
+ # Convert <code>str</code> to UTF-8
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-wxm0', str).
def toutf8(str)
- ::NKF::nkf('-w', str)
+ ::NKF::nkf('-wm0', str)
end
+ module_function :toutf8
+ # call-seq:
+ # Kconv.toutf16(str) -> string
+ #
+ # Convert <code>str</code> to UTF-16
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-w16xm0', str).
def toutf16(str)
- ::NKF::nkf('-w16', str)
+ ::NKF::nkf('-w16m0', str)
end
+ module_function :toutf16
- alias :to_jis :tojis
- alias :to_euc :toeuc
- alias :to_eucjp :toeuc
- alias :to_sjis :tosjis
- alias :to_shiftjis :tosjis
- alias :to_iso2022jp :tojis
- alias :to_utf8 :toutf8
- alias :to_utf16 :toutf16
-
#
# guess
#
+ # call-seq:
+ # Kconv.guess(str) -> integer
+ #
+ # Guess input encoding by NKF.guess2
def guess(str)
::NKF::guess(str)
end
+ module_function :guess
+ # call-seq:
+ # Kconv.guess_old(str) -> integer
+ #
+ # Guess input encoding by NKF.guess1
def guess_old(str)
::NKF::guess1(str)
end
+ module_function :guess_old
- def guess_as_symbol(str)
- CONSTANT_TO_SYMBOL[guess(str)]
- end
-
#
# isEncoding
#
+ # call-seq:
+ # Kconv.iseuc(str) -> obj or nil
+ #
+ # Returns whether input encoding is EUC-JP or not.
+ #
+ # *Note* don't expect this return value is MatchData.
def iseuc(str)
RegexpEucjp.match( str )
end
-
+ module_function :iseuc
+
+ # call-seq:
+ # Kconv.issjis(str) -> obj or nil
+ #
+ # Returns whether input encoding is Shift_JIS or not.
+ #
+ # *Note* don't expect this return value is MatchData.
def issjis(str)
RegexpShiftjis.match( str )
end
+ module_function :issjis
+ # call-seq:
+ # Kconv.isutf8(str) -> obj or nil
+ #
+ # Returns whether input encoding is UTF-8 or not.
+ #
+ # *Note* don't expect this return value is MatchData.
def isutf8(str)
RegexpUtf8.match( str )
end
+ module_function :isutf8
+end
+
+class String
+ # call-seq:
+ # String#kconv(out_code, in_code = Kconv::AUTO)
#
- # encoding?
+ # Convert <code>self</code> to out_code.
+ # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
#
-
- def eucjp?(str)
- RegexpEucjp.match( str ) ? true : false
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want to decode them, use NKF.nkf.
+ def kconv(out_code, in_code=Kconv::AUTO)
+ Kconv::kconv(self, out_code, in_code)
end
+
+ #
+ # to Encoding
+ #
+
+ # call-seq:
+ # String#tojis -> string
+ #
+ # Convert <code>self</code> to ISO-2022-JP
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-jxm0', str).
+ def tojis; Kconv.tojis(self) end
- def shiftjis?(str)
- RegexpShiftjis.match( str ) ? true : false
- end
+ # call-seq:
+ # String#toeuc -> string
+ #
+ # Convert <code>self</code> to EUC-JP
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-exm0', str).
+ def toeuc; Kconv.toeuc(self) end
- def utf8?(str)
- RegexpUtf8.match( str ) ? true : false
- end
+ # call-seq:
+ # String#tosjis -> string
+ #
+ # Convert <code>self</code> to Shift_JIS
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-sxm0', str).
+ def tosjis; Kconv.tosjis(self) end
- alias :euc? :eucjp?
- alias :sjis? :shiftjis?
+ # call-seq:
+ # String#toutf8 -> string
+ #
+ # Convert <code>self</code> to UTF-8
+ #
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-wxm0', str).
+ def toutf8; Kconv.toutf8(self) end
+ # call-seq:
+ # String#toutf16 -> string
#
- # Private Methods
+ # Convert <code>self</code> to UTF-16
#
- def symbol_to_option(symbol)
- if symbol.is_a? Integer
- symbol = CONSTANT_TO_SYMBOL[symbol]
- elsif symbol.to_s[0] == ?-
- return symbol.to_s
- end
- begin
- SYMBOL_TO_OPTION[ symbol.to_s.downcase.delete('-_').to_sym ]
- rescue
- return nil
- end
- end
+ # *Note*
+ # This method convert halfwidth katakana to fullwidth katakana.
+ # If you don't want it, use NKF.nkf('-w16xm0', str).
+ def toutf16; Kconv.toutf16(self) end
#
- # Make them module functions
+ # is Encoding
#
- module_function(*instance_methods(false))
- private_class_method :symbol_to_option
-end
+ # call-seq:
+ # String#iseuc -> obj or nil
+ #
+ # Returns whether <code>self</code>'s encoding is EUC-JP or not.
+ #
+ # *Note* don't expect this return value is MatchData.
+ def iseuc; Kconv.iseuc(self) end
-class String
- def kconv(*args)
- Kconv::kconv(self, *args)
- end
-
- def conv(*args)
- Kconv::conv(self, *args)
- end
-
- # to Encoding
- def tojis
- ::NKF::nkf('-j', self)
- end
- def toeuc
- ::NKF::nkf('-e', self)
- end
- def tosjis
- ::NKF::nkf('-s', self)
- end
- def toutf8
- ::NKF::nkf('-w', self)
- end
- def toutf16
- ::NKF::nkf('-w16', self)
- end
- alias :to_jis :tojis
- alias :to_euc :toeuc
- alias :to_eucjp :toeuc
- alias :to_sjis :tosjis
- alias :to_shiftjis :tosjis
- alias :to_iso2022jp :tojis
- alias :to_utf8 :toutf8
- alias :to_utf16 :toutf16
-
- # is Encoding
- def iseuc; Kconv.iseuc( self ) end
- def issjis; Kconv.issjis( self ) end
- def isutf8; Kconv.isutf8( self ) end
- def eucjp?; Kconv.eucjp?( self ) end
- def shiftjis?;Kconv.shiftjis?( self ) end
- def utf8?; Kconv.utf8?( self ) end
- alias :euc? :eucjp?
- alias :sjis? :shiftjis?
-
- def guess_as_symbol; Kconv.guess_as_symbol( self ) end
+ # call-seq:
+ # String#issjis -> obj or nil
+ #
+ # Returns whether <code>self</code>'s encoding is Shift_JIS or not.
+ #
+ # *Note* don't expect this return value is MatchData.
+ def issjis; Kconv.issjis(self) end
+
+ # call-seq:
+ # String#isutf8 -> obj or nil
+ #
+ # Returns whether <code>self</code>'s encoding is UTF-8 or not.
+ #
+ # *Note* don't expect this return value is MatchData.
+ def isutf8; Kconv.isutf8(self) end
end
Modified: trunk/ext/nkf/nkf-utf8/nkf.c
===================================================================
--- trunk/ext/nkf/nkf-utf8/nkf.c 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/ext/nkf/nkf-utf8/nkf.c 2006-04-13 08:18:48 UTC (rev 490)
@@ -39,13 +39,14 @@
** E-Mail: furukawa tcp-ip.or.jp
** $B$^$G8fO"Mm$r$*4j$$$7$^$9!#(B
***********************************************************************/
-/* $Id: nkf.c,v 1.17 2005/07/21 16:13:07 naruse Exp $ */
-#define NKF_VERSION "2.0.5"
-#define NKF_RELEASE_DATE "2005-07-22"
+/* $Id: nkf.c,v 1.19 2006/03/29 01:12:22 usa Exp $ */
+#define NKF_VERSION "2.0.6"
+#define NKF_RELEASE_DATE "2006-03-26"
#include "config.h"
#define COPY_RIGHT \
- "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse"
+ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
+ " 2002-2006 Kono, Furukawa, Naruse, mastodon"
/*
@@ -190,7 +191,9 @@
#define X0201 2
#define ISO8859_1 8
#define NO_X0201 3
-#define X0212 16
+#define X0212 0x2844
+#define X0213_1 0x284F
+#define X0213_2 0x2850
/* Input Assumption */
@@ -232,6 +235,17 @@
#define is_alnum(c) \
(('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
+/* I don't trust portablity of toupper */
+#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
+#define nkf_isoctal(c) ('0'<=c && c<='7')
+#define nkf_isdigit(c) ('0'<=c && c<='9')
+#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
+#define nkf_isblank(c) (c == SPACE || c == TAB)
+#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
+#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
+#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
+#define hex2bin(x) ( nkf_isdigit(x) ? x - '0' : nkf_toupper(x) - 'A' + 10)
+
#define HOLD_SIZE 1024
#define IOBUF_SIZE 16384
@@ -247,13 +261,12 @@
#if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
-#define sizeof_euc_utf8 94
#define sizeof_euc_to_utf8_1byte 94
#define sizeof_euc_to_utf8_2bytes 94
#define sizeof_utf8_to_euc_C2 64
#define sizeof_utf8_to_euc_E5B8 64
#define sizeof_utf8_to_euc_2bytes 112
-#define sizeof_utf8_to_euc_3bytes 112
+#define sizeof_utf8_to_euc_3bytes 16
#endif
/* MIME preprocessor */
@@ -300,14 +313,41 @@
STATIC int s_iconv PROTO((int c2,int c1,int c0));
STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
STATIC int e_iconv PROTO((int c2,int c1,int c0));
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+/* UCS Mapping
+ * 0: Shift_JIS, eucJP-ascii
+ * 1: eucJP-ms
+ * 2: CP932, CP51932
+ */
+#define UCS_MAP_ASCII 0
+#define UCS_MAP_MS 1
+#define UCS_MAP_CP932 2
+STATIC int ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
#ifdef UTF8_INPUT_ENABLE
+/* no NEC special, NEC-selected IBM extended and IBM extended characters */
+STATIC int no_cp932ext_f = FALSE;
+/* ignore ZERO WIDTH NO-BREAK SPACE */
+STATIC int ignore_zwnbsp_f = TRUE;
+STATIC int no_best_fit_chars_f = FALSE;
+STATIC int unicode_subchar = '?'; /* the regular substitution character */
+STATIC void encode_fallback_html PROTO((int c));
+STATIC void encode_fallback_xml PROTO((int c));
+STATIC void encode_fallback_java PROTO((int c));
+STATIC void encode_fallback_perl PROTO((int c));
+STATIC void encode_fallback_subchar PROTO((int c));
+STATIC void (*encode_fallback)PROTO((int c)) = NULL;
STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
STATIC int w_iconv PROTO((int c2,int c1,int c0));
STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
+STATIC int unicode_to_jis_common PROTO((int c2,int c1,int c0,int *p2,int *p1));
STATIC int w_iconv_common PROTO((int c1,int c0,const unsigned short *const *pp,int psize,int *p2,int *p1));
STATIC int ww16_conv PROTO((int c2, int c1, int c0));
+STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
#endif
#ifdef UTF8_OUTPUT_ENABLE
+STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
+STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
STATIC int e2w_conv PROTO((int c2,int c1));
STATIC void w_oconv PROTO((int c2,int c1));
STATIC void w_oconv16 PROTO((int c2,int c1));
@@ -400,14 +440,6 @@
STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
#endif
STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
-STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
-#endif
-#ifdef UTF8_OUTPUT_ENABLE
-STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
-STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
-STATIC int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
-#endif
#ifdef UNICODE_NORMALIZATION
STATIC int nfc_f = FALSE;
@@ -462,10 +494,12 @@
#endif
#ifdef SHIFTJIS_CP932
-STATIC int cp932_f = TRUE;
+/* invert IBM extended characters to others */
+STATIC int cp51932_f = TRUE;
#define CP932_TABLE_BEGIN (0xfa)
#define CP932_TABLE_END (0xfc)
+/* invert NEC-selected IBM extended characters to IBM extended characters */
STATIC int cp932inv_f = TRUE;
#define CP932INV_TABLE_BEGIN (0xed)
#define CP932INV_TABLE_END (0xee)
@@ -478,6 +512,7 @@
STATIC int x0212_shift PROTO((int c));
STATIC int x0212_unshift PROTO((int c));
#endif
+STATIC int x0213_f = FALSE;
STATIC unsigned char prefix_table[256];
@@ -670,9 +705,13 @@
#define CRLF 1
-STATIC int file_out = FALSE;
+STATIC int file_out_f = FALSE;
#ifdef OVERWRITE
-STATIC int overwrite = FALSE;
+STATIC int overwrite_f = FALSE;
+STATIC int preserve_time_f = FALSE;
+STATIC int backup_f = FALSE;
+STATIC char *backup_suffix = "";
+STATIC char *get_backup_filename PROTO((const char *suffix, const char *filename));
#endif
STATIC int crmode_f = 0; /* CR, NL, CRLF */
@@ -783,9 +822,9 @@
#endif
/* reopen file for stdout */
- if (file_out == TRUE) {
+ if (file_out_f == TRUE) {
#ifdef OVERWRITE
- if (overwrite){
+ if (overwrite_f){
outfname = malloc(strlen(origfname)
+ strlen(".nkftmpXXXXXX")
+ 1);
@@ -807,7 +846,7 @@
}
strcat(outfname, "ntXXXXXX");
mktemp(outfname);
- fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
+ fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
S_IREAD | S_IWRITE);
#else
strcat(outfname, ".nkftmpXXXXXX");
@@ -860,7 +899,7 @@
}
fclose(fin);
#ifdef OVERWRITE
- if (overwrite) {
+ if (overwrite_f) {
struct stat sb;
#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
time_t tb[2];
@@ -882,23 +921,37 @@
}
/* $B%?%$%`%9%?%s%W$rI|85(B */
+ if(preserve_time_f){
#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
- tb[0] = tb[1] = sb.st_mtime;
- if (utime(outfname, tb)) {
- fprintf(stderr, "Can't set timestamp %s\n", outfname);
- }
+ tb[0] = tb[1] = sb.st_mtime;
+ if (utime(outfname, tb)) {
+ fprintf(stderr, "Can't set timestamp %s\n", outfname);
+ }
#else
- tb.actime = sb.st_atime;
- tb.modtime = sb.st_mtime;
- if (utime(outfname, &tb)) {
- fprintf(stderr, "Can't set timestamp %s\n", outfname);
- }
+ tb.actime = sb.st_atime;
+ tb.modtime = sb.st_mtime;
+ if (utime(outfname, &tb)) {
+ fprintf(stderr, "Can't set timestamp %s\n", outfname);
+ }
#endif
+ }
+ if(backup_f){
+ char *backup_filename = get_backup_filename(backup_suffix, origfname);
#ifdef MSDOS
- if (unlink(origfname)){
- perror(origfname);
- }
+ unlink(backup_filename);
#endif
+ if (rename(origfname, backup_filename)) {
+ perror(backup_filename);
+ fprintf(stderr, "Can't rename %s to %s\n",
+ origfname, backup_filename);
+ }
+ }else{
+#ifdef MSDOS
+ if (unlink(origfname)){
+ perror(origfname);
+ }
+#endif
+ }
if (rename(outfname, origfname)) {
perror(origfname);
fprintf(stderr, "Can't rename %s to %s\n",
@@ -911,23 +964,68 @@
}
}
#ifdef EASYWIN /*Easy Win */
- if (file_out == FALSE)
+ if (file_out_f == FALSE)
scanf("%d",&end_check);
else
fclose(stdout);
#else /* for Other OS */
- if (file_out == TRUE)
+ if (file_out_f == TRUE)
fclose(stdout);
#endif /*Easy Win */
return (0);
}
#endif /* WIN32DLL */
+#ifdef OVERWRITE
+char *get_backup_filename(suffix, filename)
+ const char *suffix;
+ const char *filename;
+{
+ char *backup_filename = NULL;
+ int asterisk_count = 0;
+ int i, j;
+ int filename_length = strlen(filename);
+
+ for(i = 0; suffix[i]; i++){
+ if(suffix[i] == '*') asterisk_count++;
+ }
+
+ if(asterisk_count){
+ backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
+ if (!backup_filename){
+ perror("Can't malloc backup filename.");
+ return NULL;
+ }
+
+ for(i = 0, j = 0; suffix[i];){
+ if(suffix[i] == '*'){
+ backup_filename[j] = '\0';
+ strncat(backup_filename, filename, filename_length);
+ i++;
+ j += filename_length;
+ }else{
+ backup_filename[j++] = suffix[i++];
+ }
+ }
+ backup_filename[j] = '\0';
+ }else{
+ j = strlen(suffix) + filename_length;
+ backup_filename = malloc( + 1);
+ strcpy(backup_filename, filename);
+ strcat(backup_filename, suffix);
+ backup_filename[j] = '\0';
+ }
+ return backup_filename;
+}
+#endif
+
STATIC const
struct {
const char *name;
const char *alias;
} long_option[] = {
+ {"ic=", ""},
+ {"oc=", ""},
{"base64","jMB"},
{"euc","e"},
{"euc-input","E"},
@@ -953,23 +1051,32 @@
#ifdef X0212_ENABLE
{"x0212", ""},
#endif
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- {"internal-unicode", ""},
-#endif
#ifdef UTF8_OUTPUT_ENABLE
{"utf8", "w"},
{"utf16", "w16"},
{"ms-ucs-map", ""},
+ {"fb-skip", ""},
+ {"fb-html", ""},
+ {"fb-xml", ""},
+ {"fb-perl", ""},
+ {"fb-java", ""},
+ {"fb-subchar", ""},
+ {"fb-subchar=", ""},
#endif
#ifdef UTF8_INPUT_ENABLE
{"utf8-input", "W"},
{"utf16-input", "W16"},
+ {"no-cp932ext", ""},
+ {"no-best-fit-chars",""},
#endif
#ifdef UNICODE_NORMALIZATION
{"utf8mac-input", ""},
#endif
#ifdef OVERWRITE
{"overwrite", ""},
+ {"overwrite=", ""},
+ {"in-place", ""},
+ {"in-place=", ""},
#endif
#ifdef INPUT_OPTION
{"cap-input", ""},
@@ -998,40 +1105,328 @@
options(cp)
unsigned char *cp;
{
- int i;
+ int i, j;
unsigned char *p = NULL;
+ unsigned char *cp_back = NULL;
+ unsigned char codeset[32];
if (option_mode==1)
return;
while(*cp && *cp++!='-');
- while (*cp) {
+ while (*cp || cp_back) {
+ if(!*cp){
+ cp = cp_back;
+ cp_back = NULL;
+ continue;
+ }
p = 0;
switch (*cp++) {
case '-': /* literal options */
- if (!*cp) { /* ignore the rest of arguments */
+ if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
option_mode = 1;
return;
}
for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
- int j;
p = (unsigned char *)long_option[i].name;
for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
if (*p == cp[j] || cp[j] == ' '){
- p = &cp[j];
+ p = &cp[j] + 1;
break;
}
p = 0;
}
if (p == 0) return;
- cp = (unsigned char *)long_option[i].alias;
- if (!*cp){
- cp = p;
+ while(*cp && *cp != SPACE && cp++);
+ if (long_option[i].alias[0]){
+ cp_back = cp;
+ cp = (unsigned char *)long_option[i].alias;
+ }else{
+ if (strcmp(long_option[i].name, "ic=") == 0){
+ for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
+ codeset[i] = nkf_toupper(p[i]);
+ }
+ codeset[i] = 0;
+ if(strcmp(codeset, "ISO-2022-JP") == 0 ||
+ strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
+ strcmp(codeset, "CP50220") == 0 ||
+ strcmp(codeset, "CP50221") == 0 ||
+ strcmp(codeset, "CP50222") == 0 ||
+ strcmp(codeset, "ISO-2022-JP-MS") == 0){
+ input_f = JIS_INPUT;
+ }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
+ input_f = JIS_INPUT;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+ }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
+ input_f = JIS_INPUT;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+ x0213_f = TRUE;
+ }else if(strcmp(codeset, "SHIFT_JIS") == 0){
+ input_f = SJIS_INPUT;
+ if (x0201_f==NO_X0201) x0201_f=TRUE;
+ }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
+ strcmp(codeset, "CSWINDOWS31J") == 0 ||
+ strcmp(codeset, "CP932") == 0 ||
+ strcmp(codeset, "MS932") == 0){
+ input_f = SJIS_INPUT;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+ }else if(strcmp(codeset, "EUCJP") == 0 ||
+ strcmp(codeset, "EUC-JP") == 0){
+ input_f = JIS_INPUT;
+ }else if(strcmp(codeset, "CP51932") == 0){
+ input_f = JIS_INPUT;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+ }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
+ strcmp(codeset, "EUCJP-MS") == 0 ||
+ strcmp(codeset, "EUCJPMS") == 0){
+ input_f = JIS_INPUT;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_MS;
+#endif
+ }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
+ strcmp(codeset, "EUCJP-ASCII") == 0){
+ input_f = JIS_INPUT;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
+ strcmp(codeset, "SHIFT_JIS-2004") == 0){
+ input_f = SJIS_INPUT;
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+ cp932inv_f = FALSE;
+#endif
+ if (x0201_f==NO_X0201) x0201_f=TRUE;
+ }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
+ strcmp(codeset, "EUC-JIS-2004") == 0){
+ input_f = JIS_INPUT;
+ x0201_f = FALSE;
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+ cp932inv_f = FALSE;
+#endif
+#ifdef UTF8_INPUT_ENABLE
+ }else if(strcmp(codeset, "UTF-8") == 0 ||
+ strcmp(codeset, "UTF-8N") == 0 ||
+ strcmp(codeset, "UTF-8-BOM") == 0){
+ input_f = UTF8_INPUT;
+#ifdef UNICODE_NORMALIZATION
+ }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
+ strcmp(codeset, "UTF-8-MAC") == 0){
+ input_f = UTF8_INPUT;
+ nfc_f = TRUE;
+#endif
+ }else if(strcmp(codeset, "UTF-16") == 0){
+ input_f = UTF16BE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
+ }else if(strcmp(codeset, "UTF-16BE") == 0 ||
+ strcmp(codeset, "UTF-16BE-BOM") == 0){
+ input_f = UTF16BE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
+ }else if(strcmp(codeset, "UTF-16LE") == 0 ||
+ strcmp(codeset, "UTF-16LE-BOM") == 0){
+ input_f = UTF16LE_INPUT;
+ utf16_mode = UTF16LE_INPUT;
+#endif
+ }
+ continue;
+ }
+ if (strcmp(long_option[i].name, "oc=") == 0){
+ for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
+ codeset[i] = nkf_toupper(p[i]);
+ }
+ codeset[i] = 0;
+ if(strcmp(codeset, "ISO-2022-JP") == 0 ||
+ strcmp(codeset, "CP50220") == 0){
+ output_conv = j_oconv;
+ }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
+ output_conv = j_oconv;
+ no_cp932ext_f = TRUE;
+ }else if(strcmp(codeset, "CP50221") == 0 ||
+ strcmp(codeset, "ISO-2022-JP-MS") == 0){
+ output_conv = j_oconv;
+ x0201_f = FALSE;
+ }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
+ output_conv = j_oconv;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+ }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
+ output_conv = j_oconv;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+ }else if(strcmp(codeset, "ISO-2022-JP-MS") == 0){
+ output_conv = j_oconv;
+ x0201_f = FALSE;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+ }else if(strcmp(codeset, "SHIFT_JIS") == 0){
+ output_conv = s_oconv;
+ }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
+ strcmp(codeset, "CSWINDOWS31J") == 0 ||
+ strcmp(codeset, "CP932") == 0 ||
+ strcmp(codeset, "MS932") == 0){
+ output_conv = s_oconv;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+ }else if(strcmp(codeset, "EUCJP") == 0 ||
+ strcmp(codeset, "EUC-JP") == 0){
+ output_conv = e_oconv;
+ }else if(strcmp(codeset, "CP51932") == 0){
+ output_conv = e_oconv;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+ }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
+ strcmp(codeset, "EUCJP-MS") == 0 ||
+ strcmp(codeset, "EUCJPMS") == 0){
+ output_conv = e_oconv;
+ x0201_f = FALSE;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_MS;
+#endif
+ }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
+ strcmp(codeset, "EUCJP-ASCII") == 0){
+ output_conv = e_oconv;
+ x0201_f = FALSE;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
+ strcmp(codeset, "SHIFT_JIS-2004") == 0){
+ output_conv = s_oconv;
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp932inv_f = FALSE;
+#endif
+ }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
+ strcmp(codeset, "EUC-JIS-2004") == 0){
+ output_conv = e_oconv;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ }else if(strcmp(codeset, "UTF-8") == 0){
+ output_conv = w_oconv;
+ }else if(strcmp(codeset, "UTF-8N") == 0){
+ output_conv = w_oconv;
+ unicode_bom_f=1;
+ }else if(strcmp(codeset, "UTF-8-BOM") == 0){
+ output_conv = w_oconv;
+ unicode_bom_f=2;
+ }else if(strcmp(codeset, "UTF-16BE") == 0){
+ output_conv = w_oconv16;
+ unicode_bom_f=1;
+ }else if(strcmp(codeset, "UTF-16") == 0 ||
+ strcmp(codeset, "UTF-16BE-BOM") == 0){
+ output_conv = w_oconv16;
+ unicode_bom_f=2;
+ }else if(strcmp(codeset, "UTF-16LE") == 0){
+ output_conv = w_oconv16;
+ w_oconv16_LE = 1;
+ unicode_bom_f=1;
+ }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
+ output_conv = w_oconv16;
+ w_oconv16_LE = 1;
+ unicode_bom_f=2;
+#endif
+ }
+ continue;
+ }
#ifdef OVERWRITE
if (strcmp(long_option[i].name, "overwrite") == 0){
- file_out = TRUE;
- overwrite = TRUE;
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
continue;
}
+ if (strcmp(long_option[i].name, "overwrite=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
+ backup_f = TRUE;
+ backup_suffix = malloc(strlen(p) + 1);
+ strcpy(backup_suffix, p);
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ backup_f = TRUE;
+ backup_suffix = malloc(strlen(p) + 1);
+ strcpy(backup_suffix, p);
+ continue;
+ }
#endif
#ifdef INPUT_OPTION
if (strcmp(long_option[i].name, "cap-input") == 0){
@@ -1061,21 +1456,21 @@
#endif
if (strcmp(long_option[i].name, "cp932") == 0){
#ifdef SHIFTJIS_CP932
- cp932_f = TRUE;
+ cp51932_f = TRUE;
cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_CP932;
#endif
continue;
}
if (strcmp(long_option[i].name, "no-cp932") == 0){
#ifdef SHIFTJIS_CP932
- cp932_f = FALSE;
+ cp51932_f = FALSE;
cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = FALSE;
+ ms_ucs_map_f = UCS_MAP_ASCII;
#endif
continue;
}
@@ -1104,14 +1499,68 @@
}
#endif
#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- if (strcmp(long_option[i].name, "internal-unicode") == 0){
- internal_unicode_f = TRUE;
+ if (strcmp(long_option[i].name, "no-cp932ext") == 0){
+ no_cp932ext_f = TRUE;
continue;
}
+ if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
+ no_best_fit_chars_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-skip") == 0){
+ encode_fallback = NULL;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-html") == 0){
+ encode_fallback = encode_fallback_html;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-xml" ) == 0){
+ encode_fallback = encode_fallback_xml;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-java") == 0){
+ encode_fallback = encode_fallback_java;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-perl") == 0){
+ encode_fallback = encode_fallback_perl;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar") == 0){
+ encode_fallback = encode_fallback_subchar;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar=") == 0){
+ encode_fallback = encode_fallback_subchar;
+ unicode_subchar = 0;
+ if (p[0] != '0'){
+ /* decimal number */
+ for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
+ unicode_subchar *= 10;
+ unicode_subchar += hex2bin(p[i]);
+ }
+ }else if(p[1] == 'x' || p[1] == 'X'){
+ /* hexadecimal number */
+ for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
+ unicode_subchar <<= 4;
+ unicode_subchar |= hex2bin(p[i]);
+ }
+ }else{
+ /* octal number */
+ for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
+ unicode_subchar *= 8;
+ unicode_subchar += hex2bin(p[i]);
+ }
+ }
+ w16e_conv(unicode_subchar, &i, &j);
+ unicode_subchar = i<<8 | j;
+ continue;
+ }
#endif
#ifdef UTF8_OUTPUT_ENABLE
if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_MS;
continue;
}
#endif
@@ -1123,9 +1572,9 @@
}
#endif
if (strcmp(long_option[i].name, "prefix=") == 0){
- if (*p == '=' && ' ' < p[1] && p[1] < 128){
- for (i = 2; ' ' < p[i] && p[i] < 128; i++){
- prefix_table[p[i]] = p[1];
+ if (' ' < p[0] && p[0] < 128){
+ for (i = 1; ' ' < p[i] && p[i] < 128; i++){
+ prefix_table[p[i]] = p[0];
}
}
continue;
@@ -1333,7 +1782,7 @@
continue;
#ifndef PERL_XS
case 'O':/* for Output file */
- file_out = TRUE;
+ file_out_f = TRUE;
continue;
#endif
case 'c':/* add cr code */
@@ -1565,7 +2014,7 @@
ptr->stat = 1;
status_push_ch(ptr, c);
#ifdef SHIFTJIS_CP932
- }else if (cp932_f
+ }else if (cp51932_f
&& CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
ptr->stat = 2;
status_push_ch(ptr, c);
@@ -1602,8 +2051,8 @@
#endif /* SHIFTJIS_CP932 */
#ifndef X0212_ENABLE
status_disable(ptr);
+#endif
break;
-#endif
}
}
@@ -1947,7 +2396,15 @@
module_connection();
c2 = 0;
+ if(input_f == SJIS_INPUT
+#ifdef UTF8_INPUT_ENABLE
+ || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT || input_f == UTF16LE_INPUT
+#endif
+ ){
+ is_8bit = TRUE;
+ }
+
input_mode = ASCII;
output_mode = ASCII;
shift_mode = FALSE;
@@ -1957,7 +2414,10 @@
#define LAST break /* end of loop, go closing */
while ((c1 = (*i_getc)(f)) != EOF) {
- code_status(c1);
+#ifdef INPUT_CODE_FIX
+ if (!input_f)
+#endif
+ code_status(c1);
if (c2) {
/* second byte */
if (c2 > DEL) {
@@ -2067,7 +2527,8 @@
/* look like bogus code */
NEXT;
}
- } else if (input_mode == X0208) {
+ } else if (input_mode == X0208 || input_mode == X0212 ||
+ input_mode == X0213_1 || input_mode == X0213_2) {
/* in case of Kanji shifted */
c2 = c1;
NEXT;
@@ -2097,13 +2558,13 @@
/* normal ASCII code */
SEND;
}
- } else if (c1 == SI) {
+ } else if (!is_8bit && c1 == SI) {
shift_mode = FALSE;
NEXT;
- } else if (c1 == SO) {
+ } else if (!is_8bit && c1 == SO) {
shift_mode = TRUE;
NEXT;
- } else if (c1 == ESC ) {
+ } else if (!is_8bit && c1 == ESC ) {
if ((c1 = (*i_getc)(f)) == EOF) {
/* (*oconv)(0, ESC); don't send bogus code */
LAST;
@@ -2141,6 +2602,14 @@
shift_mode = FALSE;
NEXT;
#endif /* X0212_ENABLE */
+ } else if (c1 == (X0213_1&0x7F)){
+ input_mode = X0213_1;
+ shift_mode = FALSE;
+ NEXT;
+ } else if (c1 == (X0213_2&0x7F)){
+ input_mode = X0213_2;
+ shift_mode = FALSE;
+ NEXT;
} else {
/* could be some special code */
(*oconv)(0, ESC);
@@ -2236,21 +2705,31 @@
SEND;
}
/* send: */
- if (input_mode == X0208)
- (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
+ switch(input_mode){
+ case ASCII:
+ if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
+ int c0 = (*i_getc)(f);
+ if (c0 != EOF){
+ code_status(c0);
+ (*iconv)(c2, c1, c0);
+ }
+ }
+ break;
+ case X0208:
+ case X0213_1:
+ (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
+ break;
#ifdef X0212_ENABLE
- else if (input_mode == X0212)
- (*oconv)((0x8f << 8) | c2, c1);
+ case X0212:
+ (*oconv)((0x8f << 8) | c2, c1);
+ break;
#endif /* X0212_ENABLE */
- else if (input_mode)
- (*oconv)(input_mode, c1); /* other special case */
- else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
- int c0 = (*i_getc)(f);
- if (c0 != EOF){
- code_status(c0);
- (*iconv)(c2, c1, c0);
- }
- }
+ case X0213_2:
+ (*oconv)((0x8f << 8) | c2, c1);
+ break;
+ default:
+ (*oconv)(input_mode, c1); /* other special case */
+ }
c2 = 0;
continue;
@@ -2391,8 +2870,9 @@
#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
int val;
#endif
+ STATIC const int shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
#ifdef SHIFTJIS_CP932
- if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
+ if (cp51932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
extern const unsigned short shiftjis_cp932[3][189];
val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
if (val){
@@ -2402,7 +2882,7 @@
}
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
- if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
+ if (!x0213_f && 0xfa <= c2 && c2 <= 0xfc){
extern const unsigned short shiftjis_x0212[3][189];
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
@@ -2419,18 +2899,27 @@
}
}
#endif
- c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
- if (c1 < 0x9f)
- c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
- else {
- c1 = c1 - 0x7e;
- c2++;
+ if(c2 >= 0x80){
+ if(x0213_f && c2 >= 0xF0){
+ if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
+ c2 = 0x8F20 + shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
+ }else{ /* 78<=k<=94 */
+ c2 = 0x8F00 | (c2 * 2 - 0x17B);
+ if (0x9E < c1) c2++;
+ }
+ }else{
+ c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
+ if (0x9E < c1) c2++;
+ }
+ if (c1 < 0x9F)
+ c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
+ else {
+ c1 = c1 - 0x7E;
+ }
}
#ifdef X0212_ENABLE
- if (x0212_f){
- c2 = x0212_unshift(c2);
- }
+ c2 = x0212_unshift(c2);
#endif
if (p2) *p2 = c2;
if (p1) *p1 = c1;
@@ -2469,7 +2958,7 @@
c2 = (c2 << 8) | (c1 & 0x7f);
c1 = c0 & 0x7f;
#ifdef SHIFTJIS_CP932
- if (cp932_f){
+ if (cp51932_f){
int s2, s1;
if (e2s_conv(c2, c1, &s2, &s1) == 0){
s2e_conv(s2, s1, &c2, &c1);
@@ -2500,33 +2989,21 @@
int c2, c1, c0;
int *p2, *p1;
{
- extern const unsigned short *const utf8_to_euc_2bytes[];
- extern const unsigned short *const *const utf8_to_euc_3bytes[];
int ret = 0;
- if (0xc0 <= c2 && c2 <= 0xef) {
- const unsigned short *const *pp;
-
- if (0xe0 <= c2) {
- if (c0 == 0) return -1;
- pp = utf8_to_euc_3bytes[c2 - 0x80];
- ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
- } else {
- ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
- }
+ if (!c1){
+ *p2 = 0;
+ *p1 = c2;
+ }else if (0xc0 <= c2 && c2 <= 0xef) {
+ ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
#ifdef NUMCHAR_OPTION
- if (ret){
+ if (ret > 0){
if (p2) *p2 = 0;
if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
ret = 0;
}
#endif
- return ret;
- } else if (c2 == X0201) {
- c1 &= 0x7f;
}
- if (p2) *p2 = c2;
- if (p1) *p1 = c1;
return ret;
}
@@ -2537,12 +3014,20 @@
{
int ret = 0;
- if (c0 == 0){
- if (c2 == 0) /* 0x00-0x7f */
- ; /* 1byte */
- else if ((c2 & 0xe0) == 0xc0) /* 0xc0-0xdf */
- ; /* 2ytes */
- else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
+ /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
+ if(ignore_zwnbsp_f){
+ ignore_zwnbsp_f = FALSE;
+ if(c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
+ return 0;
+ }
+
+ if (c2 == 0) /* 0x00-0x7f */
+ c1 &= 0x7F; /* 1byte */
+ else if (c0 == 0){
+ if ((c2 & 0xe0) == 0xc0){ /* 0xc0-0xdf */
+ /* 2ytes */
+ if((c2 & 0xFE) == 0xC0 || c1 < 0x80 || 0xBF < c1) return 0;
+ }else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
return -1; /* 3bytes */
#ifdef __COMMENT__
else if (0xf0 <= c2)
@@ -2551,21 +3036,20 @@
return 0; /* trail byte */
#endif
else return 0;
+ }else{
+ /* must be 3bytes */
+ if(c2 == 0xE0){
+ if(c1 < 0xA0 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
+ return 0;
+ }else if(c2 == 0xED){
+ if(c1 < 0x80 || 0x9F < c1 || c0 < 0x80 || 0xBF < c0)
+ return 0;
+ }else if((c2 & 0xf0) == 0xe0){
+ if(c1 < 0x80 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
+ return 0;
+ }else return 0;
}
- if (c2 == EOF);
- else if (c2 == 0xef && c1 == 0xbb && c0 == 0xbf) {
- return 0; /* throw BOM */
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
- unsigned short val = 0;
- if(c2 == 0){
- c2 = c1;
- c1 = 0;
- }
- val = ww16_conv(c2, c1, c0);
- c2 = (val >> 8) & 0xff;
- c1 = val & 0xff;
-#endif
+ if (c2 == 0 || c2 == EOF){
} else {
ret = w2e_conv(c2, c1, c0, &c2, &c1);
}
@@ -2574,7 +3058,9 @@
}
return ret;
}
+#endif
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
void
w16w_conv(val, p2, p1, p0)
unsigned short val;
@@ -2594,13 +3080,17 @@
*p0 = 0x80 | (val & 0x3f);
}
}
+#endif
+#ifdef UTF8_INPUT_ENABLE
int
ww16_conv(c2, c1, c0)
int c2, c1, c0;
{
unsigned short val;
- if (c2 >= 0xe0){
+ if (c2 >= 0xf0){
+ val = -1;
+ }else if (c2 >= 0xe0){
val = (c2 & 0x0f) << 12;
val |= (c1 & 0x3f) << 6;
val |= (c0 & 0x3f);
@@ -2618,50 +3108,44 @@
unsigned short val;
int *p2, *p1;
{
- extern const unsigned short *const utf8_to_euc_2bytes[];
- extern const unsigned short *const *const utf8_to_euc_3bytes[];
int c2, c1, c0;
- const unsigned short *const *pp;
- int psize;
int ret = 0;
- w16w_conv(val, &c2, &c1, &c0);
- if (c1){
- if (c0){
- pp = utf8_to_euc_3bytes[c2 - 0x80];
- psize = sizeof_utf8_to_euc_C2;
- ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
- }else{
- pp = utf8_to_euc_2bytes;
- psize = sizeof_utf8_to_euc_2bytes;
- ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
- }
+ if (val < 0x80){
+ *p2 = 0;
+ *p1 = val;
+ }else{
+ w16w_conv(val, &c2, &c1, &c0);
+ ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
#ifdef NUMCHAR_OPTION
- if (ret){
- *p2 = 0;
- *p1 = CLASS_UTF16 | val;
- ret = 0;
- }
+ if (ret > 0){
+ *p2 = 0;
+ *p1 = CLASS_UTF16 | val;
+ ret = 0;
+ }
#endif
- }else{
- *p2 = 0;
- *p1 = c2;
}
return ret;
}
+#endif
+#ifdef UTF8_INPUT_ENABLE
int
w_iconv16(c2, c1, c0)
int c2, c1,c0;
{
int ret = 0;
- if (c2==0376 && c1==0377){
- utf16_mode = UTF16BE_INPUT;
- return 0;
- } else if (c2==0377 && c1==0376){
- utf16_mode = UTF16LE_INPUT;
- return 0;
+ /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
+ if(ignore_zwnbsp_f){
+ ignore_zwnbsp_f = FALSE;
+ if (c2==0376 && c1==0377){
+ utf16_mode = UTF16BE_INPUT;
+ return 0;
+ }else if(c2==0377 && c1==0376){
+ utf16_mode = UTF16LE_INPUT;
+ return 0;
+ }
}
if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
int tmp;
@@ -2670,17 +3154,132 @@
if ((c2==0 && c1 < 0x80) || c2==EOF) {
(*oconv)(c2, c1);
return 0;
- }
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16));
-#endif
- else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
+ }else if((c2>>3)==27){ /* surrogate pair */
+ return 1;
+ }else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
if (ret) return ret;
(*oconv)(c2, c1);
return 0;
}
int
+unicode_to_jis_common(c2, c1, c0, p2, p1)
+ int c2, c1, c0;
+ int *p2, *p1;
+{
+ extern const unsigned short *const utf8_to_euc_2bytes[];
+ extern const unsigned short *const utf8_to_euc_2bytes_ms[];
+ extern const unsigned short *const utf8_to_euc_2bytes_932[];
+ extern const unsigned short *const *const utf8_to_euc_3bytes[];
+ extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
+ extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
+ const unsigned short *const *pp;
+ const unsigned short *const *const *ppp;
+ STATIC const int no_best_fit_chars_table_C2[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
+ 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0};
+ STATIC const int no_best_fit_chars_table_C2_ascii[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
+ STATIC const int no_best_fit_chars_table_932_C2[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
+ STATIC const int no_best_fit_chars_table_932_C3[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
+ int ret = 0;
+
+ if(c2 < 0x80){
+ *p2 = 0;
+ *p1 = c2;
+ }else if(c2 < 0xe0){
+ if(no_best_fit_chars_f){
+ if(ms_ucs_map_f == UCS_MAP_CP932){
+ switch(c2){
+ case 0xC2:
+ if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
+ break;
+ case 0xC3:
+ if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
+ break;
+ }
+ }else if(cp51932_f){
+ if(c2 == 0xC2 && no_best_fit_chars_table_C2[c1&0x3F]) return 1;
+ }else{
+ if(c2 == 0xC2 && no_best_fit_chars_table_C2_ascii[c1&0x3F]) return 1;
+ }
+ }
+ pp =
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
+ utf8_to_euc_2bytes;
+ ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
+ }else if(c0){
+ if(no_best_fit_chars_f){
+ if(ms_ucs_map_f == UCS_MAP_CP932){
+ if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
+ }else if(ms_ucs_map_f == UCS_MAP_MS){
+ switch(c2){
+ case 0xE2:
+ switch(c1){
+ case 0x80:
+ if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
+ break;
+ case 0x88:
+ if(c0 == 0x92) return 1;
+ break;
+ }
+ break;
+ case 0xE3:
+ if(c1 == 0x80 || c0 == 0x9C) return 1;
+ break;
+ }
+ }else{
+ switch(c2){
+ case 0xE2:
+ switch(c1){
+ case 0x80:
+ if(c0 == 0x95) return 1;
+ break;
+ case 0x88:
+ if(c0 == 0xA5) return 1;
+ break;
+ }
+ break;
+ case 0xEF:
+ switch(c1){
+ case 0xBC:
+ if(c0 == 0x8D) return 1;
+ break;
+ case 0xBD:
+ if(c0 == 0x9E && cp51932_f) return 1;
+ break;
+ case 0xBF:
+ if(0xA0 <= c0 && c0 <= 0xA5) return 1;
+ break;
+ }
+ break;
+ }
+ }
+ }
+ ppp =
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
+ utf8_to_euc_3bytes;
+ ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
+ }else return -1;
+ return ret;
+}
+
+int
w_iconv_common(c1, c0, pp, psize, p2, p1)
int c1,c0;
const unsigned short *const *pp;
@@ -2699,9 +3298,13 @@
if (p == 0) return 1;
c0 -= 0x80;
- if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
+ if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
val = p[c0];
if (val == 0) return 1;
+ if (no_cp932ext_f && (
+ (val>>8) == 0x2D || /* NEC special characters */
+ val > 0xF300 /* NEC special characters */
+ )) return 1;
c2 = val >> 8;
if (val & 0x8000){
@@ -2715,6 +3318,118 @@
return 0;
}
+void
+nkf_each_char_to_hex(f, c)
+ void (*f)PROTO((int c2,int c1));
+ int c;
+{
+ const char *hex = "0123456789ABCDEF";
+ int shift = 20;
+ c &= 0x00FFFFFF;
+ while(shift >= 0){
+ if(c >= 1<<shift){
+ while(shift >= 0){
+ (*f)(0, hex[(c>>shift)&0xF]);
+ shift -= 4;
+ }
+ }else{
+ shift -= 4;
+ }
+ }
+ return;
+}
+
+void
+encode_fallback_html(c)
+ int c;
+{
+ (*oconv)(0, '&');
+ (*oconv)(0, '#');
+ c &= 0x00FFFFFF;
+ if(c >= 1000000)
+ (*oconv)(0, 0x30+(c/1000000)%10);
+ if(c >= 100000)
+ (*oconv)(0, 0x30+(c/100000 )%10);
+ if(c >= 10000)
+ (*oconv)(0, 0x30+(c/10000 )%10);
+ if(c >= 1000)
+ (*oconv)(0, 0x30+(c/1000 )%10);
+ if(c >= 100)
+ (*oconv)(0, 0x30+(c/100 )%10);
+ if(c >= 10)
+ (*oconv)(0, 0x30+(c/10 )%10);
+ if(c >= 0)
+ (*oconv)(0, 0x30+ c %10);
+ (*oconv)(0, ';');
+ return;
+}
+
+void
+encode_fallback_xml(c)
+ int c;
+{
+ (*oconv)(0, '&');
+ (*oconv)(0, '#');
+ (*oconv)(0, 'x');
+ nkf_each_char_to_hex(oconv, c);
+ (*oconv)(0, ';');
+ return;
+}
+
+void
+encode_fallback_java(c)
+ int c;
+{
+ const char *hex = "0123456789ABCDEF";
+ (*oconv)(0, '\\');
+ if((c&0x00FFFFFF) > 0xFFFF){
+ (*oconv)(0, 'U');
+ (*oconv)(0, '0');
+ (*oconv)(0, '0');
+ (*oconv)(0, hex[(c>>20)&0xF]);
+ (*oconv)(0, hex[(c>>16)&0xF]);
+ }else{
+ (*oconv)(0, 'u');
+ }
+ (*oconv)(0, hex[(c>>12)&0xF]);
+ (*oconv)(0, hex[(c>> 8)&0xF]);
+ (*oconv)(0, hex[(c>> 4)&0xF]);
+ (*oconv)(0, hex[ c &0xF]);
+ return;
+}
+
+void
+encode_fallback_perl(c)
+ int c;
+{
+ (*oconv)(0, '\\');
+ (*oconv)(0, 'x');
+ (*oconv)(0, '{');
+ nkf_each_char_to_hex(oconv, c);
+ (*oconv)(0, '}');
+ return;
+}
+
+void
+encode_fallback_subchar(c)
+ int c;
+{
+ int shift = 16;
+ c = unicode_subchar;
+ (*oconv)((c>>8)&0xFF, c&0xFF);
+ return;
+ while(shift >= 0){
+ if(c >= 1<<shift){
+ while(shift >= 0){
+ (*oconv)(0, (c>>shift)&0xFF);
+ shift -= 8;
+ }
+ }else{
+ shift -= 8;
+ }
+ }
+ return;
+}
#endif
#ifdef UTF8_OUTPUT_ENABLE
@@ -2732,6 +3447,9 @@
#ifdef X0212_ENABLE
} else if (c2 >> 8 == 0x8f){
extern const unsigned short *const x0212_to_utf8_2bytes[];
+ if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == 0x8F22 && c1 == 0x43){
+ return 0xA6;
+ }
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
p = x0212_to_utf8_2bytes[c2];
@@ -2742,7 +3460,7 @@
c2 &= 0x7f;
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
- p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
+ p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
else
return 0;
}
@@ -2792,9 +3510,7 @@
(*o_putc)(c1 | 0x080);
} else {
output_mode = UTF8;
- if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
- val = ((c2<<8)&0xff00) + c1;
- else val = e2w_conv(c2, c1);
+ val = e2w_conv(c2, c1);
if (val){
w16w_conv(val, &c2, &c1, &c0);
(*o_putc)(c2);
@@ -2827,8 +3543,7 @@
unicode_bom_f=1;
}
- if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
- } else if (c2 == ISO8859_1) {
+ if (c2 == ISO8859_1) {
c2 = 0;
c1 |= 0x80;
#ifdef NUMCHAR_OPTION
@@ -2861,6 +3576,7 @@
if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
w16e_conv(c1, &c2, &c1);
if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
+ if(encode_fallback)(*encode_fallback)(c1);
return;
}
}
@@ -2881,14 +3597,17 @@
} else if ((c2 & 0xff00) >> 8 == 0x8f){
output_mode = JAPANESE_EUC;
#ifdef SHIFTJIS_CP932
- if (cp932_f){
+ if (cp51932_f){
int s2, s1;
if (e2s_conv(c2, c1, &s2, &s1) == 0){
s2e_conv(s2, s1, &c2, &c1);
}
}
#endif
- if ((c2 & 0xff00) >> 8 == 0x8f){
+ if (c2 == 0) {
+ output_mode = ASCII;
+ (*o_putc)(c1);
+ }else if ((c2 & 0xff00) >> 8 == 0x8f){
if (x0212_f){
(*o_putc)(0x8f);
(*o_putc)((c2 & 0x7f) | 0x080);
@@ -2947,32 +3666,42 @@
e2s_conv(c2, c1, p2, p1)
int c2, c1, *p2, *p1;
{
-#ifdef X0212_ENABLE
- int val = 0;
- const unsigned short *ptr;
int ndx;
- extern const unsigned short *const x0212_shiftjis[];
if ((c2 & 0xff00) == 0x8f00){
- ndx = c2 & 0x7f;
- if (0x21 <= ndx && ndx <= 0x7e){
- ptr = x0212_shiftjis[ndx - 0x21];
- if (ptr){
- val = ptr[(c1 & 0x7f) - 0x21];
- }
- if (val){
- c2 = val >> 8;
- c1 = val & 0xff;
- if (p2) *p2 = c2;
- if (p1) *p1 = c1;
- return 0;
- }
- }
- c2 = x0212_shift(c2);
- }
+ ndx = c2 & 0xff;
+ if (x0213_f){
+ if((0x21 <= ndx && ndx <= 0x2F)){
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
+ return 0;
+ }else if(0x6E <= ndx && ndx <= 0x7E){
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
+ return 0;
+ }
+ return 1;
+ }
+#ifdef X0212_ENABLE
+ else if(0x21 <= ndx && ndx <= 0x7e){
+ int val = 0;
+ const unsigned short *ptr;
+ extern const unsigned short *const x0212_shiftjis[];
+ ptr = x0212_shiftjis[ndx - 0x21];
+ if (ptr){
+ val = ptr[(c1 & 0x7f) - 0x21];
+ }
+ if (val){
+ c2 = val >> 8;
+ c1 = val & 0xff;
+ if (p2) *p2 = c2;
+ if (p1) *p1 = c1;
+ return 0;
+ }
+ c2 = x0212_shift(c2);
+ }
#endif /* X0212_ENABLE */
- if ((c2 & 0xff00) == 0x8f00){
- return 1;
}
+ if(0x7F < c2) return 1;
if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
return 0;
@@ -2986,6 +3715,10 @@
#ifdef NUMCHAR_OPTION
if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
w16e_conv(c1, &c2, &c1);
+ if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
+ if(encode_fallback)(*encode_fallback)(c1);
+ return;
+ }
}
#endif
if (c2 == EOF) {
@@ -3043,8 +3776,12 @@
c1;
{
#ifdef NUMCHAR_OPTION
- if ((c1 & CLASS_MASK) == CLASS_UTF16){
+ if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
w16e_conv(c1, &c2, &c1);
+ if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
+ if(encode_fallback)(*encode_fallback)(c1);
+ return;
+ }
}
#endif
if (c2 == EOF) {
@@ -3057,12 +3794,22 @@
(*o_putc)(EOF);
#ifdef X0212_ENABLE
} else if ((c2 & 0xff00) >> 8 == 0x8f){
- if (output_mode!=X0212) {
- output_mode = X0212;
- (*o_putc)(ESC);
- (*o_putc)('$');
- (*o_putc)('(');
- (*o_putc)('D');
+ if(x0213_f){
+ if(output_mode!=X0213_2){
+ output_mode = X0213_2;
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)(X0213_2&0x7F);
+ }
+ }else{
+ if(output_mode!=X0212){
+ output_mode = X0212;
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)(X0212&0x7F);
+ }
}
(*o_putc)(c2 & 0x7f);
(*o_putc)(c1);
@@ -3090,16 +3837,21 @@
}
(*o_putc)(c1);
} else {
- if (output_mode != X0208) {
+ if(c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
+ if(x0213_f){
+ if (output_mode!=X0213_1) {
+ output_mode = X0213_1;
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)(X0213_1&0x7F);
+ }
+ }else if (output_mode != X0208) {
output_mode = X0208;
(*o_putc)(ESC);
(*o_putc)('$');
(*o_putc)(kanji_intro);
}
- if (c1<0x20 || 0x7e<c1)
- return;
- if (c2<0x20 || 0x7e<c2)
- return;
(*o_putc)(c2);
(*o_putc)(c1);
}
@@ -3300,7 +4052,7 @@
if (f_line<=fold_len) { /* normal case */
fold_state = 1;
} else {
- if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
+ if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
f_line = char_size(c2,c1);
fold_state = '\n'; /* We can't wait, do fold now */
} else if (c2==X0201) {
@@ -3568,7 +4320,7 @@
(const unsigned char *)"\075?ISO-8859-1?B?",
(const unsigned char *)"\075?ISO-2022-JP?B?",
(const unsigned char *)"\075?ISO-2022-JP?Q?",
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
(const unsigned char *)"\075?UTF-8?B?",
(const unsigned char *)"\075?UTF-8?Q?",
#endif
@@ -3580,7 +4332,7 @@
/* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
e_iconv, s_iconv, 0, 0, 0, 0,
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
w_iconv, w_iconv,
#endif
0,
@@ -3588,7 +4340,7 @@
const int mime_encode[] = {
JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
UTF8, UTF8,
#endif
ASCII,
@@ -3597,7 +4349,7 @@
const int mime_encode_method[] = {
'B', 'B','Q', 'B', 'B', 'Q',
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
'B', 'Q',
#endif
'Q',
@@ -3607,15 +4359,6 @@
#define MAXRECOVER 20
-/* I don't trust portablity of toupper */
-#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
-#define nkf_isdigit(c) ('0'<=c && c<='9')
-#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
-#define nkf_isblank(c) (c == SPACE || c == TAB)
-#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
-#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
-#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
-
void
switch_mime_getc()
{
@@ -3823,14 +4566,6 @@
}
#endif /*WIN32DLL*/
-int
-hex2bin(x)
- int x;
-{
- if (nkf_isdigit(x)) return x - '0';
- return nkf_toupper(x) - 'A' + 10;
-}
-
#ifdef INPUT_OPTION
#ifdef ANSI_C_PROTOTYPE
@@ -4038,12 +4773,12 @@
if (mime_decode_mode == 'Q') {
if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
restart_mime_q:
- if (c1=='_') return ' ';
+ if (c1=='_' && mimebuf_f != FIXED_MIME) return ' ';
if (c1<=' ' || DEL<=c1) {
mime_decode_mode = exit_mode; /* prepare for quit */
return c1;
}
- if (c1!='=' && c1!='?') {
+ if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
return c1;
}
@@ -4455,13 +5190,10 @@
{
switch(mimeout_mode) {
case 'Q':
- if(c==SPACE){
- (*o_mputc)('_');
- base64_count++;
- } else if (c==CR||c==NL) {
+ if (c==CR||c==NL) {
(*o_mputc)(c);
base64_count = 0;
- } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
+ } else if(!nkf_isalnum(c)) {
(*o_mputc)('=');
(*o_mputc)(itoh4(((c>>4)&0xf)));
(*o_mputc)(itoh4((c&0xf)));
@@ -4749,13 +5481,19 @@
x0201_f = NO_X0201;
#endif
iso2022jp_f = FALSE;
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- internal_unicode_f = FALSE;
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+ ms_ucs_map_f = UCS_MAP_ASCII;
#endif
+#ifdef UTF8_INPUT_ENABLE
+ no_cp932ext_f = FALSE;
+ ignore_zwnbsp_f = TRUE;
+ no_best_fit_chars_f = FALSE;
+ encode_fallback = NULL;
+ unicode_subchar = '?';
+#endif
#ifdef UTF8_OUTPUT_ENABLE
unicode_bom_f = 0;
w_oconv16_LE = 0;
- ms_ucs_map_f = FALSE;
#endif
#ifdef UNICODE_NORMALIZATION
nfc_f = FALSE;
@@ -4776,9 +5514,13 @@
exec_f = 0;
#endif
#ifdef SHIFTJIS_CP932
- cp932_f = TRUE;
+ cp51932_f = TRUE;
cp932inv_f = TRUE;
#endif
+#ifdef X0212_ENABLE
+ x0212_f = FALSE;
+ x0213_f = FALSE;
+#endif
{
int i;
for (i = 0; i < 256; i++){
@@ -4822,7 +5564,7 @@
input_mode = ASCII;
shift_mode = FALSE;
mime_decode_mode = FALSE;
- file_out = FALSE;
+ file_out_f = FALSE;
crmode_f = 0;
option_mode = 0;
broken_counter = 0;
@@ -4865,26 +5607,27 @@
fprintf(stderr,"Flags:\n");
fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
#ifdef DEFAULT_CODE_SJIS
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_JIS
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_EUC
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_UTF8
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
#endif
#ifdef UTF8_OUTPUT_ENABLE
- fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
+ fprintf(stderr," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
#endif
- fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
+ fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
#ifdef UTF8_INPUT_ENABLE
- fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
+ fprintf(stderr," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
#endif
fprintf(stderr,"t no conversion\n");
- fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
+ fprintf(stderr,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
+ fprintf(stderr,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n");
fprintf(stderr,"r {de/en}crypt ROT13/47\n");
fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
fprintf(stderr,"v Show this usage. V: show version\n");
@@ -4892,41 +5635,46 @@
fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
- fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
- fprintf(stderr," 3: Convert HTML Entity\n");
+ fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII\n");
+ fprintf(stderr," 1: Kankaku to 1 space 2: to 2 spaces 3: Convert to HTML Entity\n");
fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
#ifdef MSDOS
fprintf(stderr,"T Text mode output\n");
#endif
fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
- fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
+ fprintf(stderr,"d,c Convert line breaks -d: LF -c: CRLF\n");
fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
- fprintf(stderr,"long name options\n");
- fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
- fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
- fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
- fprintf(stderr," --x0212 Convert JISX0212\n");
- fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
- fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr,"Long name options\n");
+ fprintf(stderr," --ic=<input codeset> --oc=<output codeset>\n");
+ fprintf(stderr," Specify the input or output codeset\n");
+ fprintf(stderr," --fj --unix --mac --windows\n");
+ fprintf(stderr," --jis --euc --sjis --utf8 --utf16 --mime --base64\n");
+ fprintf(stderr," Convert for the system or code\n");
+ fprintf(stderr," --hiragana --katakana --katakana-hiragana\n");
+ fprintf(stderr," To Hiragana/Katakana Conversion\n");
+ fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
#ifdef INPUT_OPTION
fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
#endif
#ifdef NUMCHAR_OPTION
fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
#endif
-#ifdef UNICODE_NORMALIZATION
- fprintf(stderr," --utf8mac-input UTF-8-MAC input\n");
+#ifdef UTF8_INPUT_ENABLE
+ fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
+ fprintf(stderr," Specify how nkf handles unassigned characters\n");
#endif
-#ifdef UTF8_OUTPUT_ENABLE
- fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
-#endif
#ifdef OVERWRITE
- fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
+ fprintf(stderr," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n");
+ fprintf(stderr," Overwrite original listed files by filtered result\n");
+ fprintf(stderr," --overwrite preserves timestamp of original files\n");
#endif
- fprintf(stderr," -g, --guess Guess the input code\n");
- fprintf(stderr," --help,--version\n");
+ fprintf(stderr," -g --guess Guess the input code\n");
+ fprintf(stderr," --help --version Show this help/the version\n");
+ fprintf(stderr," For more information, see also man nkf\n");
+ fprintf(stderr,"\n");
version();
}
Modified: trunk/ext/nkf/nkf-utf8/utf8tbl.c
===================================================================
--- trunk/ext/nkf/nkf-utf8/utf8tbl.c 2006-04-13 02:33:21 UTC (rev 489)
+++ trunk/ext/nkf/nkf-utf8/utf8tbl.c 2006-04-13 08:18:48 UTC (rev 490)
@@ -4,14 +4,14 @@
const unsigned short euc_to_utf8_A1[] = {
0x3000, 0x3001, 0x3002, 0xFF0C, 0xFF0E, 0x30FB, 0xFF1A,
0xFF1B, 0xFF1F, 0xFF01, 0x309B, 0x309C, 0x00B4, 0xFF40, 0x00A8,
- 0xFF3E, 0xFFE3, 0xFF3F, 0x30FD, 0x30FE, 0x309D, 0x309E, 0x3003,
- 0x4EDD, 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F,
+ 0xFF3E, 0x203E, 0xFF3F, 0x30FD, 0x30FE, 0x309D, 0x309E, 0x3003,
+ 0x4EDD, 0x3005, 0x3006, 0x3007, 0x30FC, 0x2014, 0x2010, 0xFF0F,
0xFF3C, 0x301C, 0x2016, 0xFF5C, 0x2026, 0x2025, 0x2018, 0x2019,
0x201C, 0x201D, 0xFF08, 0xFF09, 0x3014, 0x3015, 0xFF3B, 0xFF3D,
0xFF5B, 0xFF5D, 0x3008, 0x3009, 0x300A, 0x300B, 0x300C, 0x300D,
0x300E, 0x300F, 0x3010, 0x3011, 0xFF0B, 0x2212, 0x00B1, 0x00D7,
0x00F7, 0xFF1D, 0x2260, 0xFF1C, 0xFF1E, 0x2266, 0x2267, 0x221E,
- 0x2234, 0x2642, 0x2640, 0x00B0, 0x2032, 0x2033, 0x2103, 0xFFE5,
+ 0x2234, 0x2642, 0x2640, 0x00B0, 0x2032, 0x2033, 0x2103, 0x00A5,
0xFF04, 0x00A2, 0x00A3, 0xFF05, 0xFF03, 0xFF06, 0xFF0A, 0xFF20,
0x00A7, 0x2606, 0x2605, 0x25CB, 0x25CF, 0x25CE, 0x25C7,
};
@@ -1302,7 +1302,7 @@
0, 0, 0, 0, 0, 0, 0, 0x02D8,
0x02C7, 0x00B8, 0x02D9, 0x02DD, 0x00AF, 0x02DB, 0x02DA, 0xFF5E,
0x0384, 0x0385, 0, 0, 0, 0, 0, 0,
- 0, 0, 0x00A1, 0x00A6, 0x00BF, 0, 0, 0,
+ 0, 0, 0x00A1, 0xFFE4, 0x00BF, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2249,6 +2249,34 @@
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short euc_to_utf8_8FF3[] = {
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0x2170, 0x2171, 0x2172, 0x2173, 0x2174,
+ 0x2175, 0x2176, 0x2177, 0x2178, 0x2179, 0x2160, 0x2161,
+};
+const unsigned short euc_to_utf8_8FF4[] = {
+ 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168,
+ 0x2169, 0xff07, 0xff02, 0x3231, 0x2116, 0x2121, 0x70bb, 0x4efc,
+ 0x50f4, 0x51ec, 0x5307, 0x5324, 0xfa0e, 0x548a, 0x5759, 0xfa0f,
+ 0xfa10, 0x589e, 0x5bec, 0x5cf5, 0x5d53, 0xfa11, 0x5fb7, 0x6085,
+ 0x6120, 0x654e, 0x663b, 0x6665, 0xfa12, 0xf929, 0x6801, 0xfa13,
+ 0xfa14, 0x6a6b, 0x6ae2, 0x6df8, 0x6df2, 0x7028, 0xfa15, 0xfa16,
+ 0x7501, 0x7682, 0x769e, 0xfa17, 0x7930, 0xfa18, 0xfa19, 0xfa1a,
+ 0xfa1b, 0x7ae7, 0xfa1c, 0xfa1d, 0x7da0, 0x7dd6, 0xfa1e, 0x8362,
+ 0xfa1f, 0x85b0, 0xfa20, 0xfa21, 0x8807, 0xfa22, 0x8b7f, 0x8cf4,
+ 0x8d76, 0xfa23, 0xfa24, 0xfa25, 0x90de, 0xfa26, 0x9115, 0xfa27,
+ 0xfa28, 0x9592, 0xf9dc, 0xfa29, 0x973b, 0x974d, 0x9751, 0xfa2a,
+ 0xfa2b, 0xfa2c, 0x999e, 0x9ad9, 0x9b72, 0xfa2d, 0x9ed1,
+};
#endif /* X0212_ENABLE */
const unsigned short euc_to_utf8_1byte[] = {
@@ -2341,9 +2369,9 @@
euc_to_utf8_8FE4, euc_to_utf8_8FE5, euc_to_utf8_8FE6, euc_to_utf8_8FE7,
euc_to_utf8_8FE8, euc_to_utf8_8FE9, euc_to_utf8_8FEA, euc_to_utf8_8FEB,
euc_to_utf8_8FEC, euc_to_utf8_8FED, 0, 0,
+ 0, 0, 0, euc_to_utf8_8FF3,
+ euc_to_utf8_8FF4, 0, 0, 0,
0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
0, 0, 0,};
#endif /* X0212_ENABLE */
#endif /* UTF8_OUTPUT_ENABLE */
@@ -2354,11 +2382,31 @@
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0x0E20, 0xA242, 0x2171, 0x2172, 0xA270, 0, 0xA243, 0x2178,
- 0x212F, 0x0E7D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
+ 0, 0xA242, 0x2171, 0x2172, 0xA270, 0x216F, 0xA243, 0x2178,
+ 0x212F, 0xA26D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
};
+const unsigned short utf8_to_euc_C2_ms[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178,
+ 0x212F, 0xA26D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
+ 0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
+ 0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
+};
+const unsigned short utf8_to_euc_C2_932[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x21, 0x2171, 0x2172, 0, 0x5C, 0x7C, 0x2178,
+ 0x212F, 0x63, 0x61, 0x2263, 0x224C, 0x2D, 0x52, 0x2131,
+ 0x216B, 0x215E, 0x32, 0x33, 0x212D, 0x264C, 0x2279, 0x2126,
+ 0x2124, 0x31, 0x6F, 0x2264, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_C3[] = {
0xAA22, 0xAA21, 0xAA24, 0xAA2A, 0xAA23, 0xAA29, 0xA921, 0xAA2E,
0xAA32, 0xAA31, 0xAA34, 0xAA33, 0xAA40, 0xAA3F, 0xAA42, 0xAA41,
@@ -2369,6 +2417,16 @@
0xA943, 0xAB50, 0xAB52, 0xAB51, 0xAB54, 0xAB58, 0xAB53, 0x2160,
0xA94C, 0xAB63, 0xAB62, 0xAB65, 0xAB64, 0xAB72, 0xA950, 0xAB73,
};
+const unsigned short utf8_to_euc_C3_932[] = {
+ 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x43,
+ 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49,
+ 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x4F, 0x215F,
+ 0x4F, 0x55, 0x55, 0x55, 0x55, 0x59, 0x54, 0x73,
+ 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x63,
+ 0x65, 0x65, 0x65, 0x65, 0x69, 0x69, 0x69, 0x69,
+ 0x64, 0x6E, 0x6F, 0x6F, 0x6F, 0x6F, 0x6F, 0x2160,
+ 0x6F, 0x75, 0x75, 0x75, 0x75, 0x79, 0x74, 0x79,
+};
const unsigned short utf8_to_euc_C4[] = {
0xAA27, 0xAB27, 0xAA25, 0xAB25, 0xAA28, 0xAB28, 0xAA2B, 0xAB2B,
0xAA2C, 0xAB2C, 0xAA2F, 0xAB2F, 0xAA2D, 0xAB2D, 0xAA30, 0xAB30,
@@ -2457,14 +2515,34 @@
0x2277, 0x2278, 0, 0, 0, 0x2145, 0x2144, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0x2273, 0, 0x216C, 0x216D, 0, 0, 0, 0,
+ 0, 0, 0, 0x2228, 0, 0, 0x2131, 0,
+};
+const unsigned short utf8_to_euc_E280_ms[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x213E, 0, 0, 0, 0x213D, 0x213D, 0x2142, 0,
+ 0x2146, 0x2147, 0, 0, 0x2148, 0x2149, 0, 0,
+ 0x2277, 0x2278, 0, 0, 0, 0x2145, 0x2144, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x2273, 0, 0x216C, 0x216D, 0, 0, 0, 0,
+ 0, 0, 0, 0x2228, 0, 0, 0x7E, 0,
+};
+const unsigned short utf8_to_euc_E280_932[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x213E, 0, 0, 0, 0, 0x213D, 0, 0,
+ 0x2146, 0x2147, 0, 0, 0x2148, 0x2149, 0, 0,
+ 0x2277, 0x2278, 0, 0, 0, 0x2145, 0x2144, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x2273, 0, 0x216C, 0x216D, 0, 0, 0, 0,
0, 0, 0, 0x2228, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E284[] = {
0, 0, 0, 0x216E, 0, 0, 0, 0,
- 0, 0x2B37, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0x2B31, 0, 0, 0x2D62, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0x2D64, 0x0E7E, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2D62, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2D64, 0xA26F, 0, 0, 0, 0, 0,
0, 0, 0, 0x2272, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2475,9 +2553,9 @@
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0x2D35, 0x2D36, 0x2D37, 0x2D38, 0x2D39, 0x2D3A, 0x2D3B, 0x2D3C,
- 0x2D3D, 0x2D3E, 0x2A2B, 0x2A2C, 0, 0, 0, 0,
- 0x7C71, 0x7C72, 0x7C73, 0x7C74, 0x7C75, 0x7C76, 0x7C77, 0x7C78,
- 0x7C79, 0x7C7A, 0x2A3F, 0x2A40, 0, 0, 0, 0,
+ 0x2D3D, 0x2D3E, 0, 0, 0, 0, 0, 0,
+ 0xF373, 0xF374, 0xF375, 0xF376, 0xF377, 0xF378, 0xF379, 0xF37A,
+ 0xF37B, 0xF37C, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E286[] = {
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2490,14 +2568,14 @@
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E287[] = {
- 0, 0, 0, 0, 0x2C4E, 0x2C4F, 0x2C4D, 0,
0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x224D, 0, 0x224E, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0x2C52, 0x2C53,
- 0x2C51, 0x2C54, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E288[] = {
0x224F, 0, 0x225F, 0x2250, 0, 0, 0, 0x2260,
@@ -2509,6 +2587,16 @@
0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
0, 0, 0, 0, 0, 0x2266, 0, 0,
};
+const unsigned short utf8_to_euc_E288_932[] = {
+ 0x224F, 0, 0x225F, 0x2250, 0, 0, 0, 0x2260,
+ 0x223A, 0, 0, 0x223B, 0, 0, 0, 0,
+ 0, 0x2D74, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0x2265, 0, 0, 0x2267, 0x2167, 0x2D78,
+ 0x225C, 0, 0, 0, 0, 0x2142, 0, 0x224A,
+ 0x224B, 0x2241, 0x2240, 0x2269, 0x226A, 0, 0x2D73, 0,
+ 0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
+ 0, 0, 0, 0, 0, 0x2266, 0, 0,
+};
const unsigned short utf8_to_euc_E289[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2546,17 +2634,7 @@
0, 0, 0, 0, 0, 0, 0, 0,
0x2D21, 0x2D22, 0x2D23, 0x2D24, 0x2D25, 0x2D26, 0x2D27, 0x2D28,
0x2D29, 0x2D2A, 0x2D2B, 0x2D2C, 0x2D2D, 0x2D2E, 0x2D2F, 0x2D30,
- 0x2D31, 0x2D32, 0x2D33, 0x2D34, 0x293F, 0x2940, 0x2941, 0x2942,
- 0x2943, 0x2944, 0x2945, 0x2946, 0x2947, 0x2948, 0x2949, 0x294A,
-};
-const unsigned short utf8_to_euc_E292[] = {
- 0x294B, 0x294C, 0x294D, 0x294E, 0x294F, 0x2950, 0x2951, 0x2952,
- 0x2972, 0x2973, 0x2974, 0x2975, 0x2976, 0x2977, 0x2978, 0x2979,
- 0x297A, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0x2A5D, 0x2A5E, 0x2A5F, 0x2A60,
- 0x2A61, 0x2A62, 0x2A63, 0x2A64, 0x2A65, 0x2A66, 0x2A67, 0x2A68,
- 0x2A69, 0x2A6A, 0x2A6B, 0x2A6C, 0x2A6D, 0x2A6E, 0x2A6F, 0x2A70,
- 0x2A71, 0x2A72, 0x2A73, 0x2A74, 0x2A75, 0x2A76, 0, 0,
+ 0x2D31, 0x2D32, 0x2D33, 0x2D34, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E294[] = {
@@ -2601,43 +2679,43 @@
};
const unsigned short utf8_to_euc_E298[] = {
0, 0, 0, 0, 0, 0x217A, 0x2179, 0,
- 0, 0, 0, 0, 0, 0, 0x2C36, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0x2C4A, 0x2C4B, 0x2C49, 0x2C4C,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E299[] = {
0x216A, 0, 0x2169, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0x2C25, 0x2C23, 0x2C24, 0x2C26, 0x2C21, 0x2C27, 0x2C28, 0x2C22,
+ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x2276, 0, 0, 0x2275, 0, 0x2274,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
-const unsigned short utf8_to_euc_E29D[] = {
+const unsigned short utf8_to_euc_E380[] = {
+ 0x2121, 0x2122, 0x2123, 0x2137, 0, 0x2139, 0x213A, 0x213B,
+ 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159,
+ 0x215A, 0x215B, 0x2229, 0x222E, 0x214C, 0x214D, 0, 0,
+ 0, 0, 0, 0, 0x2141, 0x2D60, 0, 0x2D61,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0x295D, 0x295E,
- 0x295F, 0x2960, 0x2961, 0x2962, 0x2963, 0x2964, 0x2965, 0,
};
-const unsigned short utf8_to_euc_E380[] = {
- 0x2121, 0x2122, 0x2123, 0x2137, 0x2C37, 0x2139, 0x213A, 0x213B,
+const unsigned short utf8_to_euc_E380_932[] = {
+ 0x2121, 0x2122, 0x2123, 0x2137, 0, 0x2139, 0x213A, 0x213B,
0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159,
0x215A, 0x215B, 0x2229, 0x222E, 0x214C, 0x214D, 0, 0,
- 0, 0, 0, 0, 0x2141, 0x2D60, 0, 0x2D61,
- 0x2C35, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x2D60, 0, 0x2D61,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E381[] = {
0, 0x2421, 0x2422, 0x2423, 0x2424, 0x2425, 0x2426, 0x2427,
@@ -2652,13 +2730,23 @@
const unsigned short utf8_to_euc_E382[] = {
0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467,
0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F,
- 0x2470, 0x2471, 0x2472, 0x2473, 0x2F49, 0, 0, 0,
- 0, 0x0E5E, 0x0E5F, 0x212B, 0x212C, 0x2135, 0x2136, 0,
+ 0x2470, 0x2471, 0x2472, 0x2473, 0, 0, 0, 0,
+ 0, 0, 0, 0x212B, 0x212C, 0x2135, 0x2136, 0,
0, 0x2521, 0x2522, 0x2523, 0x2524, 0x2525, 0x2526, 0x2527,
0x2528, 0x2529, 0x252A, 0x252B, 0x252C, 0x252D, 0x252E, 0x252F,
0x2530, 0x2531, 0x2532, 0x2533, 0x2534, 0x2535, 0x2536, 0x2537,
0x2538, 0x2539, 0x253A, 0x253B, 0x253C, 0x253D, 0x253E, 0x253F,
};
+const unsigned short utf8_to_euc_E382_932[] = {
+ 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467,
+ 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F,
+ 0x2470, 0x2471, 0x2472, 0x2473, 0x2574, 0, 0, 0,
+ 0, 0, 0, 0x212B, 0x212C, 0x2135, 0x2136, 0,
+ 0, 0x2521, 0x2522, 0x2523, 0x2524, 0x2525, 0x2526, 0x2527,
+ 0x2528, 0x2529, 0x252A, 0x252B, 0x252C, 0x252D, 0x252E, 0x252F,
+ 0x2530, 0x2531, 0x2532, 0x2533, 0x2534, 0x2535, 0x2536, 0x2537,
+ 0x2538, 0x2539, 0x253A, 0x253B, 0x253C, 0x253D, 0x253E, 0x253F,
+};
const unsigned short utf8_to_euc_E383[] = {
0x2540, 0x2541, 0x2542, 0x2543, 0x2544, 0x2545, 0x2546, 0x2547,
0x2548, 0x2549, 0x254A, 0x254B, 0x254C, 0x254D, 0x254E, 0x254F,
@@ -2666,8 +2754,8 @@
0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, 0x255F,
0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567,
0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x256D, 0x256E, 0x256F,
- 0x2570, 0x2571, 0x2572, 0x2573, 0x2574, 0x2575, 0x2576, 0x2F4B,
- 0x2F4C, 0x2F4D, 0x2F4E, 0x2126, 0x213C, 0x2133, 0x2134, 0,
+ 0x2570, 0x2571, 0x2572, 0x2573, 0x2574, 0x2575, 0x2576, 0,
+ 0, 0, 0, 0x2126, 0x213C, 0x2133, 0x2134, 0,
};
const unsigned short utf8_to_euc_E388[] = {
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2675,69 +2763,59 @@
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0x2D22, 0x2D23, 0x2D24, 0x2D25, 0x2D26, 0x2D27,
- 0x2D21, 0x2D6A, 0x2D6B, 0x2D34, 0x2D30, 0x2D35, 0x2D33, 0x2D29,
- 0x2D39, 0x2D6C, 0x2D2D, 0x2D32, 0x2D36, 0x2D37, 0x2D2F, 0x2D38,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2D6A, 0x2D6B, 0, 0, 0, 0, 0,
+ 0, 0x2D6C, 0, 0, 0, 0, 0, 0,
};
-const unsigned short utf8_to_euc_E389[] = {
- 0x2D28, 0, 0x2D2A, 0x2D2B, 0, 0, 0, 0,
+const unsigned short utf8_to_euc_E38A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-};
-const unsigned short utf8_to_euc_E38A[] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0x2D79, 0,
- 0x2D7B, 0x2D7E, 0, 0, 0, 0x2D7A, 0x2D7C, 0,
0, 0, 0, 0, 0x2D65, 0x2D66, 0x2D67, 0x2D68,
- 0x2D69, 0x2D78, 0, 0, 0, 0, 0, 0,
+ 0x2D69, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38C[] = {
- 0x2E3F, 0, 0, 0x2D46, 0, 0x2E26, 0, 0,
+ 0, 0, 0, 0x2D46, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x2D4A, 0, 0,
- 0, 0, 0, 0, 0x2D41, 0x2E2C, 0x2E25, 0,
- 0x2D44, 0, 0, 0, 0, 0, 0x2E40, 0,
+ 0, 0, 0, 0, 0x2D41, 0, 0, 0,
+ 0x2D44, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x2D42, 0x2D4C, 0, 0, 0x2D4B, 0x2D45,
- 0, 0, 0x2E41, 0x2D4D, 0, 0, 0, 0,
- 0, 0x2E42, 0, 0x2E27, 0, 0, 0x2D47, 0,
- 0, 0x2E30, 0, 0x2D4F, 0, 0, 0, 0,
+ 0, 0, 0, 0x2D4D, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2D47, 0,
+ 0, 0, 0, 0x2D4F, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38D[] = {
- 0, 0, 0x2E33, 0, 0, 0, 0, 0x2E43,
- 0, 0x2D40, 0x2D4E, 0, 0, 0x2D43, 0x2E28, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2D40, 0x2D4E, 0, 0, 0x2D43, 0, 0,
0, 0x2D48, 0, 0, 0, 0, 0, 0x2D49,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0x2D5F, 0x2D6F, 0x2D6E, 0x2D6D, 0x2E7C,
+ 0, 0, 0, 0x2D5F, 0x2D6F, 0x2D6E, 0x2D6D, 0,
};
const unsigned short utf8_to_euc_E38E[] = {
- 0, 0, 0, 0, 0, 0x2B3B, 0x2B3C, 0x2B3D,
+ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2D53, 0x2D54,
- 0x2B3A, 0, 0, 0, 0, 0, 0x2B2F, 0x2B30,
- 0x2B32, 0, 0, 0, 0x2D50, 0x2D51, 0x2D52, 0x2B22,
- 0x2B24, 0x2D56, 0x2B2A, 0, 0x2B25, 0x2B28, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0x2B36, 0x2B35, 0x2B34, 0x2B33, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0x2D50, 0x2D51, 0x2D52, 0,
+ 0, 0x2D56, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E38F[] = {
0, 0, 0, 0, 0x2D55, 0, 0, 0,
- 0, 0, 0, 0x2B39, 0, 0x2D63, 0, 0,
- 0, 0, 0, 0, 0x2B38, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x2D63, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E4B8[] = {
0x306C, 0x437A, 0xB021, 0x3C37, 0xB022, 0xB023, 0, 0x4B7C,
@@ -2745,7 +2823,7 @@
0x5022, 0x312F, 0xB025, 0, 0x336E, 0x5023, 0x4024, 0x5242,
0x3556, 0x4A3A, 0, 0, 0, 0, 0x3E67, 0xB026,
0, 0x4E3E, 0, 0xB027, 0xB028, 0, 0x4A42, 0,
- 0x792D, 0, 0x5024, 0xB02A, 0, 0x4366, 0xB02B, 0xB02C,
+ 0xB029, 0, 0x5024, 0xB02A, 0, 0x4366, 0xB02B, 0xB02C,
0xB02D, 0x5025, 0x367A, 0, 0, 0xB02E, 0x5026, 0,
0x345D, 0x4330, 0, 0x3C67, 0x5027, 0, 0, 0x5028,
};
@@ -2774,25 +2852,25 @@
0, 0, 0x3A23, 0x3270, 0, 0x503B, 0x503A, 0x4A29,
0xB044, 0, 0, 0, 0x3B46, 0x3B45, 0x423E, 0x503F,
0x4955, 0x4067, 0xB045, 0xB046, 0, 0x2138, 0x5040, 0x5042,
- 0xB047, 0x792E, 0xB049, 0x4265, 0x4E61, 0x304A, 0, 0,
+ 0xB047, 0xB048, 0xB049, 0x4265, 0x4E61, 0x304A, 0, 0,
0xB04A, 0, 0, 0, 0, 0x5041, 0x323E, 0xB04B,
0x3644, 0xB04C, 0x4367, 0xB04D, 0, 0xB04E, 0x376F, 0x5043,
- 0, 0, 0, 0x4724, 0x792F, 0xB04F, 0xB050, 0xB051,
+ 0, 0, 0, 0x4724, 0xF42F, 0xB04F, 0xB050, 0xB051,
};
const unsigned short utf8_to_euc_E4BC[] = {
- 0x7930, 0x346B, 0xB053, 0x7931, 0, 0, 0, 0,
+ 0xB052, 0x346B, 0xB053, 0xB054, 0, 0, 0, 0,
0xB055, 0x5044, 0x304B, 0xB056, 0xB057, 0x3860, 0x346C, 0x497A,
0x4832, 0x3559, 0xB058, 0, 0, 0xB059, 0xB05A, 0xB05B,
0, 0xB05C, 0x3271, 0, 0x5067, 0x4541, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0xB05D, 0x476C,
0x5046, 0xB05E, 0, 0xB060, 0x483C, 0xB061, 0x4E62, 0xB062,
- 0x3F2D, 0x7932, 0x3B47, 0xB064, 0x3B77, 0x3240, 0xB065, 0,
+ 0x3F2D, 0xB063, 0x3B47, 0xB064, 0x3B77, 0x3240, 0xB065, 0,
};
const unsigned short utf8_to_euc_E4BD[] = {
0xB066, 0, 0xB067, 0x4451, 0, 0, 0x4322, 0x504A,
0xB068, 0xB069, 0, 0xB06A, 0xB06B, 0x304C, 0x4463, 0x3D3B,
- 0x3A34, 0x4D24, 0xB06C, 0x424E, 0xB06D, 0x323F, 0x7933, 0x5049,
+ 0x3A34, 0x4D24, 0xB06C, 0x424E, 0xB06D, 0x323F, 0xB06E, 0x5049,
0xB06F, 0x4D3E, 0x5045, 0x5047, 0x3A6E, 0x5048, 0x5524, 0xB070,
0xB05F, 0, 0, 0xB071, 0, 0, 0, 0,
0, 0x5050, 0xB072, 0, 0xB073, 0, 0xB074, 0x5053,
@@ -2801,9 +2879,9 @@
};
const unsigned short utf8_to_euc_E4BE[] = {
0, 0xB07C, 0xB07D, 0x3426, 0xB07E, 0xB121, 0x5054, 0,
- 0x504C, 0xB122, 0x7935, 0x4E63, 0xB124, 0x3B78, 0xB125, 0x504D,
- 0xB126, 0x5052, 0x7934, 0xB128, 0x7937, 0, 0x5055, 0xB12A,
- 0x504E, 0xB12B, 0x7936, 0x3621, 0, 0x304D, 0xB12D, 0xB12E,
+ 0x504C, 0xB122, 0xB123, 0x4E63, 0xB124, 0x3B78, 0xB125, 0x504D,
+ 0xB126, 0x5052, 0xB127, 0xB128, 0xB129, 0, 0x5055, 0xB12A,
+ 0x504E, 0xB12B, 0xB12C, 0x3621, 0, 0x304D, 0xB12D, 0xB12E,
0x3622, 0x3241, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0x5525, 0, 0x4B79, 0x496E, 0x3874,
0, 0, 0xB12F, 0, 0, 0x3F2F, 0x4E37, 0xB130,
@@ -2811,38 +2889,38 @@
};
const unsigned short utf8_to_euc_E4BF[] = {
0xB136, 0xB137, 0x3738, 0x4225, 0x3264, 0xB138, 0xB139, 0,
- 0xB13A, 0x7926, 0x3D53, 0xB13C, 0xB13D, 0x7938, 0x5059, 0xB13F,
+ 0xB13A, 0xB13B, 0x3D53, 0xB13C, 0xB13D, 0xB13E, 0x5059, 0xB13F,
0x505E, 0x505C, 0xB140, 0, 0x5057, 0, 0, 0x422F,
0x505A, 0, 0x505D, 0x505B, 0xB141, 0x4A5D, 0, 0x5058,
0xB142, 0x3F2E, 0xB143, 0x4B73, 0x505F, 0x5060, 0, 0,
0, 0, 0, 0, 0, 0, 0x3D24, 0x506D,
0xB144, 0, 0xB145, 0x4750, 0, 0x4936, 0x5068, 0,
- 0x4A70, 0, 0x3236, 0, 0xB146, 0xB147, 0x506C, 0x793B,
+ 0x4A70, 0, 0x3236, 0, 0xB146, 0xB147, 0x506C, 0xB148,
};
const unsigned short utf8_to_euc_E580[] = {
0xB149, 0xB14A, 0, 0, 0xB14B, 0x5066, 0x506F, 0xB14C,
0, 0x4152, 0xB14D, 0x3844, 0xB14E, 0x475C, 0xB14F, 0x6047,
0xB150, 0x506E, 0x455D, 0xB151, 0x5063, 0, 0x3876, 0xB152,
- 0xB153, 0x3875, 0x5061, 0xB154, 0xB155, 0xB156, 0x793C, 0x3C5A,
- 0, 0x5069, 0x793A, 0x4A6F, 0x434D, 0x5065, 0x3771, 0xB159,
+ 0xB153, 0x3875, 0x5061, 0xB154, 0xB155, 0xB156, 0xB157, 0x3C5A,
+ 0, 0x5069, 0xB158, 0x4A6F, 0x434D, 0x5065, 0x3771, 0xB159,
0x5062, 0x506A, 0x5064, 0x4E51, 0x506B, 0x4F41, 0xB15A, 0,
0xB15B, 0, 0xB15C, 0xB15D, 0, 0xB15E, 0x3666, 0,
0, 0x3770, 0, 0xB176, 0, 0, 0, 0,
};
const unsigned short utf8_to_euc_E581[] = {
- 0x7939, 0xB160, 0x793F, 0x5070, 0, 0xB162, 0x793D, 0x5071,
+ 0xB15F, 0xB160, 0xB161, 0x5070, 0, 0xB162, 0xB163, 0x5071,
0x5075, 0x304E, 0xB164, 0, 0xB165, 0, 0xB166, 0x4A50,
0x5074, 0xB167, 0xB168, 0xB169, 0, 0x5073, 0x5077, 0xB16A,
0, 0xB16B, 0x5076, 0, 0x4464, 0, 0, 0xB16C,
0xB16D, 0, 0xB16E, 0xB16F, 0, 0x3772, 0xB170, 0xB171,
0, 0, 0xB172, 0, 0x5078, 0xB173, 0, 0,
- 0x793E, 0xB175, 0x3C45, 0, 0x4226, 0x4465, 0x3676, 0,
+ 0xB174, 0xB175, 0x3C45, 0, 0x4226, 0x4465, 0x3676, 0,
0x5079, 0, 0, 0, 0, 0x3536, 0, 0,
};
const unsigned short utf8_to_euc_E582[] = {
0x507A, 0xB177, 0, 0xB178, 0xB179, 0x507C, 0xB17A, 0,
0, 0, 0xB17B, 0, 0, 0x4B35, 0xB17C, 0xB17D,
- 0xB17E, 0x3766, 0xB221, 0xB222, 0x7940, 0, 0xB224, 0,
+ 0xB17E, 0x3766, 0xB221, 0xB222, 0xB223, 0, 0xB224, 0,
0x3B31, 0x4877, 0x507B, 0xB225, 0xB226, 0, 0xB227, 0xB228,
0xB229, 0xB22A, 0xB22B, 0, 0, 0, 0, 0,
0, 0, 0xB22C, 0, 0x3A45, 0x4D43, 0, 0xB22D,
@@ -2853,10 +2931,10 @@
0xB232, 0, 0x5124, 0xB233, 0xB234, 0x364F, 0, 0xB235,
0, 0x5121, 0x5122, 0, 0xB236, 0x462F, 0xB237, 0x417C,
0xB238, 0x3623, 0, 0xB239, 0xB23A, 0x4B4D, 0x5125, 0,
- 0x7942, 0, 0x4E3D, 0, 0xB23C, 0xB23D, 0x5126, 0xB23E,
+ 0xB23B, 0, 0x4E3D, 0, 0xB23C, 0xB23D, 0x5126, 0xB23E,
0, 0, 0xB23F, 0x5129, 0xB240, 0x5127, 0xB241, 0x414E,
0xB242, 0xB243, 0, 0, 0, 0x5128, 0x512A, 0xB244,
- 0, 0xB245, 0xB251, 0, 0x7941, 0x512C, 0xB246, 0,
+ 0, 0xB245, 0xB251, 0, 0xF430, 0x512C, 0xB246, 0,
0, 0x512B, 0xB247, 0x4A48, 0, 0, 0xB248, 0,
};
const unsigned short utf8_to_euc_E584[] = {
@@ -2871,10 +2949,10 @@
};
const unsigned short utf8_to_euc_E585[] = {
0x513A, 0x3074, 0xB265, 0x3835, 0x373B, 0x3D3C, 0x437B, 0x3624,
- 0x4068, 0x3877, 0x7943, 0x396E, 0x513C, 0x4C48, 0x4546, 0xB267,
+ 0x4068, 0x3877, 0xB266, 0x396E, 0x513C, 0x4C48, 0x4546, 0xB267,
0x3B79, 0, 0x513B, 0xB268, 0x513D, 0xB269, 0, 0xB26A,
0xB26B, 0, 0x455E, 0, 0x3375, 0, 0, 0xB26C,
- 0, 0, 0x513E, 0, 0x7944, 0x467E, 0xB26E, 0,
+ 0, 0, 0x513E, 0, 0xB26D, 0x467E, 0xB26E, 0,
0x4134, 0x5140, 0x5141, 0x482C, 0x3878, 0x4F3B, 0x5142, 0,
0, 0x3626, 0, 0, 0, 0x4A3C, 0x4236, 0x3671,
0x4535, 0, 0, 0, 0x3773, 0, 0xB26F, 0,
@@ -2883,11 +2961,11 @@
0x5143, 0, 0x5144, 0xB270, 0xB271, 0x4662, 0x315F, 0,
0, 0x5147, 0x3A7D, 0xB272, 0x5146, 0x3A46, 0xB273, 0x5148,
0x666E, 0x5149, 0x4B41, 0x514A, 0, 0x514B, 0x514C, 0x3E69,
- 0xB274, 0x3C4C, 0, 0, 0, 0x7945, 0, 0,
+ 0xB274, 0x3C4C, 0, 0, 0, 0xB275, 0, 0,
0x3427, 0xB276, 0x514F, 0xB277, 0x514D, 0x4C3D, 0x514E, 0,
0x495A, 0x5150, 0x5151, 0x5152, 0x455F, 0xB278, 0, 0,
0x5156, 0x5154, 0x5155, 0x5153, 0x3A63, 0x5157, 0x4C6A, 0x4E64,
- 0xB279, 0, 0xB27A, 0, 0xB27B, 0x5158, 0x7946, 0xB27D,
+ 0xB279, 0, 0xB27A, 0, 0xB27B, 0x5158, 0xB27C, 0xB27D,
};
const unsigned short utf8_to_euc_E587[] = {
0, 0, 0xB27E, 0, 0x4028, 0x5159, 0x3D5A, 0,
@@ -2895,14 +2973,14 @@
0, 0xB323, 0xB324, 0xB325, 0, 0xB326, 0x5245, 0,
0xB327, 0, 0, 0x515B, 0x7425, 0x3645, 0xB328, 0,
0x515C, 0x4B5E, 0xB329, 0, 0, 0xB32A, 0x3D68, 0x427C,
- 0, 0x515E, 0x4664, 0, 0x7947, 0x515F, 0xB32B, 0,
+ 0, 0x515E, 0x4664, 0, 0xF431, 0x515F, 0xB32B, 0,
0x5160, 0x332E, 0xB32C, 0xB32D, 0xB32E, 0x5161, 0x3627, 0xB32F,
0x464C, 0x317A, 0x3D50, 0, 0, 0x4821, 0x5162, 0,
};
const unsigned short utf8_to_euc_E588[] = {
0x4561, 0xB330, 0xB331, 0x3F4F, 0x5163, 0xB332, 0x4A2C, 0x405A,
0x3422, 0, 0x3429, 0x5164, 0, 0, 0x5166, 0,
- 0, 0x373A, 0xB333, 0xB334, 0x5165, 0x7948, 0xB336, 0x4E73,
+ 0, 0x373A, 0xB333, 0xB334, 0x5165, 0xB335, 0xB336, 0x4E73,
0xB337, 0, 0, 0, 0, 0x3D69, 0, 0,
0, 0, 0xB338, 0, 0x483D, 0x4A4C, 0, 0x5167,
0xB339, 0x4D78, 0x5168, 0, 0, 0, 0x5169, 0,
@@ -2923,28 +3001,28 @@
0xB34D, 0, 0xB34E, 0x3344, 0, 0xB34F, 0, 0x3760,
0x517C, 0x4E2D, 0xB350, 0, 0xB351, 0x5178, 0, 0,
0, 0x517D, 0x517A, 0xB352, 0x5179, 0xB353, 0xB354, 0xB355,
- 0xB356, 0, 0xB357, 0x4E4F, 0x7949, 0, 0, 0x3879,
- 0x3243, 0, 0, 0x4E74, 0xB359, 0xB35A, 0x794A, 0xB35C,
- 0, 0x3D75, 0x4558, 0x3965, 0x5222, 0x5223, 0, 0x7B3C,
+ 0xB356, 0, 0xB357, 0x4E4F, 0xB358, 0, 0, 0x3879,
+ 0x3243, 0, 0, 0x4E74, 0xB359, 0xB35A, 0xB35B, 0xB35C,
+ 0, 0x3D75, 0x4558, 0x3965, 0x5222, 0x5223, 0, 0xB35D,
0xB35E, 0x4E65, 0, 0, 0x4F2B, 0x5225, 0xB35F, 0xB360,
0xB361, 0x387A, 0xB362, 0xB363, 0x5224, 0xB364, 0x332F, 0,
};
const unsigned short utf8_to_euc_E58B[] = {
- 0x794B, 0x5226, 0, 0x4B56, 0xB366, 0x443C, 0xB367, 0x4D26,
+ 0xB365, 0x5226, 0, 0x4B56, 0xB366, 0x443C, 0xB367, 0x4D26,
0xB368, 0x4A59, 0, 0, 0xB369, 0x5227, 0, 0xB36A,
0, 0xB36B, 0x7055, 0, 0xB36C, 0x4630, 0xB36D, 0x5228,
- 0x342A, 0x4C33, 0, 0x794C, 0xB36F, 0x3E21, 0x5229, 0x4A67,
+ 0x342A, 0x4C33, 0, 0xB36E, 0xB36F, 0x3E21, 0x5229, 0x4A67,
0x522D, 0xB370, 0x402A, 0x522A, 0x3650, 0xB371, 0x522B, 0x342B,
0xB372, 0xB373, 0xB374, 0, 0xB375, 0, 0, 0,
0xB376, 0xB377, 0x372E, 0x522E, 0xB378, 0x522F, 0xB379, 0xB37A,
0x5230, 0x5231, 0x3C5B, 0, 0, 0, 0x387B, 0x4C5E,
};
const unsigned short utf8_to_euc_E58C[] = {
- 0x794D, 0x4C68, 0x4677, 0xB37C, 0, 0x4A71, 0x5232, 0x794E,
+ 0xB37B, 0x4C68, 0x4677, 0xB37C, 0, 0x4A71, 0x5232, 0xF432,
0x5233, 0, 0xB37D, 0xB37E, 0xB421, 0x5235, 0, 0x5237,
0x5236, 0xB422, 0, 0xB423, 0, 0x5238, 0x323D, 0x4B4C,
0xB424, 0x3A7C, 0x5239, 0xB425, 0xB426, 0x4159, 0xB427, 0xB428,
- 0x3E22, 0x3629, 0, 0x523A, 0x794F, 0xB429, 0, 0xB42A,
+ 0x3E22, 0x3629, 0, 0x523A, 0xF433, 0xB429, 0, 0xB42A,
0xB42B, 0xB42C, 0x485B, 0xB42D, 0xB42E, 0xB42F, 0, 0x523B,
0xB430, 0x523C, 0xB431, 0x523D, 0, 0xB432, 0, 0,
0x523E, 0x4924, 0x3668, 0x3065, 0xB433, 0xB434, 0xB435, 0x463F,
@@ -2956,24 +3034,24 @@
0x4331, 0xB439, 0x476E, 0xB43A, 0x4B4E, 0, 0x5246, 0,
0x406A, 0xB43B, 0, 0xB43C, 0, 0xB43D, 0x3735, 0,
0, 0x5247, 0, 0, 0xB43E, 0xB43F, 0x5248, 0x312C,
- 0x3075, 0x346D, 0x7950, 0x4228, 0x3551, 0x4D71, 0, 0x524B,
+ 0x3075, 0x346D, 0xB440, 0x4228, 0x3551, 0x4D71, 0, 0x524B,
0x3237, 0xB441, 0, 0x524A, 0, 0, 0xB442, 0x362A,
};
const unsigned short utf8_to_euc_E58E[] = {
0, 0, 0x524C, 0xB443, 0x4C71, 0, 0, 0xB444,
0xB445, 0, 0, 0, 0, 0, 0xB446, 0,
- 0, 0, 0, 0x7951, 0xB448, 0, 0x524D, 0,
+ 0, 0, 0, 0xB447, 0xB448, 0, 0x524D, 0,
0x4E52, 0xB449, 0x387C, 0, 0, 0xB44A, 0, 0x3836,
0x524E, 0xB44B, 0, 0, 0xB44C, 0x5250, 0x524F, 0,
0x3F5F, 0x3139, 0xB44D, 0xB44E, 0, 0x315E, 0x5251, 0xB44F,
- 0x5252, 0, 0x7952, 0x3837, 0xB451, 0xB452, 0x5253, 0xB453,
+ 0x5252, 0, 0xB450, 0x3837, 0xB451, 0xB452, 0x5253, 0xB453,
0xB454, 0, 0xB455, 0x356E, 0, 0xB456, 0, 0,
};
const unsigned short utf8_to_euc_E58F[] = {
0xB457, 0, 0x3B32, 0x5254, 0, 0xB458, 0, 0,
0x4B74, 0x3A35, 0x355A, 0x4D27, 0x4150, 0x483F, 0x3C7D, 0xB459,
0, 0, 0xB45A, 0xB45B, 0x3D47, 0xB45C, 0x3C68, 0x3C75,
- 0, 0x3D76, 0xB45D, 0x4840, 0, 0x7953, 0xB45F, 0x5257,
+ 0, 0x3D76, 0xB45D, 0x4840, 0, 0xB45E, 0xB45F, 0x5257,
0xB460, 0x3143, 0x4151, 0x387D, 0x3845, 0x3667, 0xB461, 0xB462,
0x525B, 0x4321, 0x427E, 0x362B, 0x3E24, 0x525C, 0x525A, 0x3244,
0x4266, 0x3C38, 0x3B4B, 0x3126, 0, 0xB463, 0x3370, 0x3966,
@@ -3001,11 +3079,11 @@
};
const unsigned short utf8_to_euc_E592[] = {
0x5272, 0xB521, 0, 0xB522, 0x5274, 0xB523, 0x5276, 0,
- 0xB524, 0xB525, 0x7956, 0x3A70, 0x4F42, 0xB526, 0x526B, 0x5269,
+ 0xB524, 0xB525, 0xF435, 0x3A70, 0x4F42, 0xB526, 0x526B, 0x5269,
0x5275, 0xB527, 0x5270, 0, 0, 0xB528, 0xB529, 0,
- 0, 0, 0, 0, 0x7955, 0, 0, 0xB52B,
+ 0, 0, 0, 0, 0xB52A, 0, 0, 0xB52B,
0, 0xB52C, 0x5278, 0, 0x5323, 0x527A, 0xB52D, 0xB52E,
- 0x527E, 0x7957, 0xB530, 0x5321, 0x527B, 0xB531, 0xB532, 0x533E,
+ 0x527E, 0xB52F, 0xB530, 0x5321, 0x527B, 0xB531, 0xB532, 0x533E,
0, 0xB533, 0x3A69, 0x3331, 0, 0, 0, 0xB534,
0x5279, 0xB535, 0xB536, 0xB537, 0x5325, 0x3076, 0x5324, 0xB538,
};
@@ -3017,7 +3095,7 @@
0xB53D, 0x3077, 0x532F, 0, 0, 0x5327, 0x5328, 0,
0x3E25, 0x4B69, 0xB53E, 0, 0xB53F, 0x532D, 0x532C, 0xB540,
0, 0, 0x452F, 0, 0, 0, 0xB541, 0,
- 0, 0, 0x532E, 0, 0xB542, 0x532B, 0xB543, 0x7958,
+ 0, 0, 0x532E, 0, 0xB542, 0x532B, 0xB543, 0xB544,
};
const unsigned short utf8_to_euc_E594[] = {
0xB545, 0xB546, 0, 0, 0x3134, 0xB547, 0x3A36, 0x3F30,
@@ -3040,7 +3118,7 @@
0, 0, 0, 0x5341, 0x5346, 0, 0x5342, 0xB565,
};
const unsigned short utf8_to_euc_E596[] = {
- 0x533D, 0xB566, 0xB567, 0x5347, 0x4131, 0, 0x7959, 0x5349,
+ 0x533D, 0xB566, 0xB567, 0x5347, 0x4131, 0, 0xB568, 0x5349,
0xB569, 0x3922, 0x533F, 0x437D, 0, 0, 0xB56A, 0xB56B,
0, 0xB56C, 0xB56D, 0xB56E,