[前][次][番号順一覧][スレッド一覧][生データ]

yarv-diff:216

From: ko1 atdot.net
Date: 9 Feb 2006 16:14:55 -0000
Subject: [yarv-diff:216] r374 - trunk

Author: matz
Date: 2006-02-10 01:14:54 +0900 (Fri, 10 Feb 2006)
New Revision: 374

Modified:
   trunk/euc_jp.c
   trunk/oniguruma.h
   trunk/regcomp.c
   trunk/regenc.c
   trunk/regenc.h
   trunk/regerror.c
   trunk/regexec.c
   trunk/regint.h
   trunk/regparse.c
   trunk/regparse.h
   trunk/sjis.c
   trunk/utf8.c
Log:
* oniguruma.h: merge Oniguruma 4.0.0  [ruby-dev:28290]


Modified: trunk/euc_jp.c
===================================================================
--- trunk/euc_jp.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/euc_jp.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -31,7 +31,7 @@
 
 #define eucjp_islead(c)    ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
 
-static int EncLen_EUCJP[] = {
+static const int EncLen_EUCJP[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -158,20 +158,16 @@
 static int
 eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
+    }
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 static UChar*

Modified: trunk/oniguruma.h
===================================================================
--- trunk/oniguruma.h	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/oniguruma.h	2006-02-09 16:14:54 UTC (rev 374)
@@ -4,7 +4,7 @@
   oniguruma.h - Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -34,8 +34,8 @@
 #endif
 
 #define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR   3
-#define ONIGURUMA_VERSION_MINOR   7
+#define ONIGURUMA_VERSION_MAJOR   4
+#define ONIGURUMA_VERSION_MINOR   0
 #define ONIGURUMA_VERSION_TEENY   0
 
 #ifdef __cplusplus
@@ -79,7 +79,11 @@
 
 /* PART: character encoding */
 
-typedef unsigned char  UChar;
+#ifndef ONIG_ESCAPE_UCHAR_COLLISION
+#define UChar OnigUChar
+#endif
+
+typedef unsigned char  OnigUChar;
 typedef unsigned long  OnigCodePoint;
 typedef unsigned int   OnigDistance;
 
@@ -149,24 +153,24 @@
 #else
 
 typedef struct {
-  int    (*mbc_enc_len)(const UChar* p);
+  int    (*mbc_enc_len)(const OnigUChar* p);
   const char*   name;
   int           max_enc_len;
   int           min_enc_len;
   OnigAmbigType support_ambig_flag;
   OnigMetaCharTableType meta_char_table;
-  int    (*is_mbc_newline)(const UChar* p, const UChar* end);
-  OnigCodePoint (*mbc_to_code)(const UChar* p, const UChar* end);
+  int    (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
+  OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
   int    (*code_to_mbclen)(OnigCodePoint code);
-  int    (*code_to_mbc)(OnigCodePoint code, UChar *buf);
-  int    (*mbc_to_normalize)(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* to);
-  int    (*is_mbc_ambiguous)(OnigAmbigType flag, const UChar** pp, const UChar* end);
-  int    (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
-  int    (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
+  int    (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
+  int    (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
+  int    (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end);
+  int    (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs);
+  int    (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs);
   int    (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
-  int    (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
-  UChar* (*left_adjust_char_head)(const UChar* start, const UChar* p);
-  int    (*is_allowed_reverse_match)(const UChar* p, const UChar* end);
+  int    (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]);
+  OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
+  int    (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
 } OnigEncodingType;
 
 typedef OnigEncodingType* OnigEncoding;
@@ -200,6 +204,7 @@
 ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
 ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
 ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
+ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
 
 #define ONIG_ENCODING_ASCII        (&OnigEncodingASCII)
 #define ONIG_ENCODING_ISO_8859_1   (&OnigEncodingISO_8859_1)
@@ -230,6 +235,7 @@
 #define ONIG_ENCODING_KOI8         (&OnigEncodingKOI8)
 #define ONIG_ENCODING_KOI8_R       (&OnigEncodingKOI8_R)
 #define ONIG_ENCODING_BIG5         (&OnigEncodingBIG5)
+#define ONIG_ENCODING_GB18030      (&OnigEncodingGB18030)
 
 #endif /* else RUBY && M17N */
 
@@ -333,22 +339,22 @@
 ONIG_EXTERN
 int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
 ONIG_EXTERN
-int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, OnigUChar *buf));
 ONIG_EXTERN
-int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* buf));
+int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* buf));
 ONIG_EXTERN
-int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end));
+int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end));
 ONIG_EXTERN
-int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const UChar* s, const UChar* end));
+int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
 
 #else  /* ONIG_RUBY_M17N */
 
 #define ONIGENC_NAME(enc)                      ((enc)->name)
 
 #define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
-  (enc)->mbc_to_normalize(flag,(const UChar** )pp,end,buf)
+  (enc)->mbc_to_normalize(flag,(const OnigUChar** )pp,end,buf)
 #define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
-  (enc)->is_mbc_ambiguous(flag,(const UChar** )pp,end)
+  (enc)->is_mbc_ambiguous(flag,(const OnigUChar** )pp,end)
 #define ONIGENC_SUPPORT_AMBIG_FLAG(enc)        ((enc)->support_ambig_flag)
 #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
         (enc)->is_allowed_reverse_match(s,end)
@@ -405,7 +411,7 @@
         (enc)->get_ctype_code_range(ctype,sbr,mbr)
 
 ONIG_EXTERN
-UChar* onigenc_step_back P_((OnigEncoding enc, const UChar* start, const UChar* s, int n));
+OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
 
 #endif /* is not ONIG_RUBY_M17N */
 
@@ -418,21 +424,21 @@
 ONIG_EXTERN
 OnigEncoding onigenc_get_default_encoding P_(());
 ONIG_EXTERN
-void  onigenc_set_default_caseconv_table P_((const UChar* table));
+void  onigenc_set_default_caseconv_table P_((const OnigUChar* table));
 ONIG_EXTERN
-UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const UChar* start, const UChar* s, const UChar** prev));
+OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
 ONIG_EXTERN
-UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
+OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
 ONIG_EXTERN
-UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
+OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
 ONIG_EXTERN
-UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
+OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
 ONIG_EXTERN
-int onigenc_strlen P_((OnigEncoding enc, const UChar* p, const UChar* end));
+int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
 ONIG_EXTERN
-int onigenc_strlen_null P_((OnigEncoding enc, const UChar* p));
+int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
 ONIG_EXTERN
-int onigenc_str_bytelen_null P_((OnigEncoding enc, const UChar* p));
+int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
 
 
 
@@ -465,6 +471,7 @@
 #define ONIG_OPTION_NOTBOL               (ONIG_OPTION_CAPTURE_GROUP << 1)
 #define ONIG_OPTION_NOTEOL               (ONIG_OPTION_NOTBOL << 1)
 #define ONIG_OPTION_POSIX_REGION         (ONIG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_MAXBIT               ONIG_OPTION_POSIX_REGION  /* limit */
 
 #define ONIG_OPTION_ON(options,regopt)      ((options) |= (regopt))
 #define ONIG_OPTION_OFF(options,regopt)     ((options) &= ~(regopt))
@@ -478,6 +485,7 @@
   OnigOptionType options;    /* default option */
 } OnigSyntaxType;
 
+ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
@@ -485,9 +493,11 @@
 ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
 
 /* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_ASIS               (&OnigSyntaxASIS)
 #define ONIG_SYNTAX_POSIX_BASIC        (&OnigSyntaxPosixBasic)
 #define ONIG_SYNTAX_POSIX_EXTENDED     (&OnigSyntaxPosixExtended)
 #define ONIG_SYNTAX_EMACS              (&OnigSyntaxEmacs)
@@ -495,6 +505,7 @@
 #define ONIG_SYNTAX_GNU_REGEX          (&OnigSyntaxGnuRegex)
 #define ONIG_SYNTAX_JAVA               (&OnigSyntaxJava)
 #define ONIG_SYNTAX_PERL               (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_PERL_NG            (&OnigSyntaxPerl_NG)
 #define ONIG_SYNTAX_RUBY               (&OnigSyntaxRuby)
 
 /* default syntax */
@@ -554,6 +565,7 @@
 #define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17)  /* \p{^..}, \P{^..} */
 #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS    (1<<18)  /* \p{IsXDigit} */
 #define ONIG_SYN_OP2_ESC_H_XDIGIT               (1<<19)  /* \h, \H */
+#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE         (1<<20)  /* \ */
 
 /* syntax (behavior) */
 #define ONIG_SYN_CONTEXT_INDEP_ANCHORS           (1<<31) /* not implemented */
@@ -695,8 +707,8 @@
 typedef struct re_registers   OnigRegion;
 
 typedef struct {
-  UChar* par;
-  UChar* par_end;
+  OnigUChar* par;
+  OnigUChar* par_end;
 } OnigErrorInfo;
 
 typedef struct {
@@ -704,8 +716,8 @@
   int upper;
 } OnigRepeatRange;
 
-typedef void (*OnigWarnFunc) P_((const char* s, ...));
-extern void onig_null_warn P_((const char* s, ...));
+typedef void (*OnigWarnFunc) P_((const char* s));
+extern void onig_null_warn P_((const char* s));
 #define ONIG_NULL_WARN       onig_null_warn
 
 #define ONIG_CHAR_TABLE_SIZE   256
@@ -776,25 +788,25 @@
 ONIG_EXTERN
 int onig_init P_((void));
 ONIG_EXTERN
-int onig_error_code_to_str PV_((UChar* s, int err_code, ...));
+int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
 ONIG_EXTERN
 void onig_set_warn_func P_((OnigWarnFunc f));
 ONIG_EXTERN
 void onig_set_verb_warn_func P_((OnigWarnFunc f));
 ONIG_EXTERN
-int onig_new P_((regex_t**, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_new P_((regex_t**, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 ONIG_EXTERN
-int onig_new_deluxe P_((regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_new_deluxe P_((regex_t** reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
 ONIG_EXTERN
 void onig_free P_((regex_t*));
 ONIG_EXTERN
-int onig_recompile P_((regex_t*, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_recompile P_((regex_t*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 ONIG_EXTERN
-int onig_recompile_deluxe P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_recompile_deluxe P_((regex_t* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
 ONIG_EXTERN
-int onig_search P_((regex_t*, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option));
+int onig_search P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
 ONIG_EXTERN
-int onig_match P_((regex_t*, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option));
+int onig_match P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
 ONIG_EXTERN
 OnigRegion* onig_region_new P_((void));
 ONIG_EXTERN
@@ -810,11 +822,11 @@
 ONIG_EXTERN
 int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
 ONIG_EXTERN
-int onig_name_to_group_numbers P_((regex_t* reg, const UChar* name, const UChar* name_end, int** nums));
+int onig_name_to_group_numbers P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
 ONIG_EXTERN
-int onig_name_to_backref_number P_((regex_t* reg, const UChar* name, const UChar* name_end, OnigRegion *region));
+int onig_name_to_backref_number P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
 ONIG_EXTERN
-int onig_foreach_name P_((regex_t* reg, int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg));
+int onig_foreach_name P_((regex_t* reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,regex_t*,void*), void* arg));
 ONIG_EXTERN
 int onig_number_of_names P_((regex_t* reg));
 ONIG_EXTERN

Modified: trunk/regcomp.c
===================================================================
--- trunk/regcomp.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regcomp.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -34,7 +34,7 @@
    ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
 
 extern OnigAmbigType
-onig_get_default_ambig_flag(void)
+onig_get_default_ambig_flag()
 {
   return OnigDefaultAmbigFlag;
 }
@@ -2120,29 +2120,6 @@
   return get_char_length_tree1(node, reg, len, 0);
 }
 
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
-  int found;
-
-  if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) {
-    if (IS_NULL(cc->mbuf)) {
-      found = 0;
-    }
-    else {
-      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
-    }
-  }
-  else {
-    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
-  }
-
-  if (IS_CCLASS_NOT(cc))
-    return !found;
-  else
-    return found;
-}
-
 /* x is not included y ==>  1 : 0 */
 static int
 is_not_included(Node* x, Node* y, regex_t* reg)
@@ -2516,6 +2493,9 @@
 
   case N_QUALIFIER:
     r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+    if (r == RECURSION_EXIST) {
+      if (NQUALIFIER(node).lower == 0) r = 0;
+    }
     break;
 
   case N_ANCHOR:
@@ -2943,15 +2923,55 @@
   return 0;
 }
 
+
 static int
+divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
+                             UChar* prev_start, UChar* prev,
+                             UChar* end, Node*** tailp, Node** root)
+{
+  UChar *tmp, *wp;
+  Node* snode;
+
+  if (prev_ambig != 0) {
+    tmp = prev_start;
+    wp  = prev_start;
+    while (tmp < prev) {
+      wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+                                     &tmp, end, wp);
+    }
+    snode = onig_node_new_str(prev_start, wp);
+    CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+    NSTRING_SET_AMBIG(snode);
+    if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
+  }
+  else {
+    snode = onig_node_new_str(prev_start, prev);
+    CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+  }
+
+  if (*tailp == (Node** )0) {
+    *root = onig_node_new_list(snode, NULL);
+    CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
+    *tailp = &(NCONS(*root).right);
+  }
+  else {
+    **tailp = onig_node_new_list(snode, NULL);
+    CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
+    *tailp = &(NCONS(**tailp).right);
+  }
+
+  return 0;
+}
+
+static int
 divide_ambig_string_node(Node* node, regex_t* reg)
 {
   StrNode* sn = &NSTRING(node);
   int ambig, prev_ambig;
   UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
-  Node *snode;
   Node *root = NULL_NODE;
   Node **tailp = (Node** )0;
+  int r;
 
   start = prev_start = p = sn->s;
   end  = sn->end;
@@ -2964,34 +2984,10 @@
     if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
                                               reg->ambig_flag, &p, end))) {
 
-      if (prev_ambig != 0) {
-        tmp = prev_start;
-        wp  = prev_start;
-        while (tmp < prev) {
-          wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
-                                         &tmp, end, wp);
-        }
-        snode = onig_node_new_str(prev_start, wp);
-        CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-        NSTRING_SET_AMBIG(snode);
-        if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
-      }
-      else {
-        snode = onig_node_new_str(prev_start, prev);
-        CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-      }
+      r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
+                                       end, &tailp, &root);
+      if (r != 0) return r;
 
-      if (tailp == (Node** )0) {
-        root = onig_node_new_list(snode, NULL);
-	CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
-	tailp = &(NCONS(root).right);
-      }
-      else {
-	*tailp = onig_node_new_list(snode, NULL);
-	CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
-	tailp = &(NCONS(*tailp).right);
-      }
-
       prev_ambig = ambig;
       prev_start = prev;
     }
@@ -3011,34 +3007,10 @@
     }
   }
   else {
-    if (prev_ambig != 0) {
-      tmp = prev_start;
-      wp  = prev_start;
-      while (tmp < end) {
-        wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
-                                       &tmp, end, wp);
-      }
-      snode = onig_node_new_str(prev_start, wp);
-      CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-      NSTRING_SET_AMBIG(snode);
-      if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode);
-    }
-    else {
-      snode = onig_node_new_str(prev_start, end);
-      CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-    }
+    r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
+                                     end, &tailp, &root);
+    if (r != 0) return r;
 
-    if (tailp == (Node** )0) {
-      root = onig_node_new_list(snode, NULL);
-      CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
-      tailp = &(NCONS(node).right);
-    }
-    else {
-      *tailp = onig_node_new_list(snode, NULL);
-      CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
-      tailp = &(NCONS(*tailp).right);
-    }
-
     swap_node(node, root);
     onig_node_str_clear(root); /* should be after swap! */
     onig_node_free(root);      /* free original string node */
@@ -3383,7 +3355,7 @@
 static int
 map_position_value(OnigEncoding enc, int i)
 {
-  static short int ByteValTable[] = {
+  static const short int ByteValTable[] = {
      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
@@ -3408,7 +3380,7 @@
 distance_value(MinMaxLen* mm)
 {
   /* 1000 / (min-max-dist + 1) */
-  static short int dist_vals[] = {
+  static const short int dist_vals[] = {
     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100, 
       91,   83,   77,   71,   67,   63,   59,   56,   53,   50, 
       48,   45,   43,   42,   40,   38,   37,   36,   34,   33, 
@@ -3711,7 +3683,7 @@
 static void
 clear_opt_map_info(OptMapInfo* map)
 {
-  static OptMapInfo clean_info = {
+  static const OptMapInfo clean_info = {
     {0, 0}, {0, 0}, 0,
     {
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -3758,8 +3730,8 @@
   int i, j, n, len;
   UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
   OnigCodePoint code, ccode;
-  OnigCompAmbigCodes* ccs;
-  OnigPairAmbigCodes* pccs;
+  const OnigCompAmbigCodes* ccs;
+  const OnigPairAmbigCodes* pccs;
   OnigAmbigType amb;
 
   add_char_opt_map_info(map, p[0], enc);
@@ -4316,10 +4288,7 @@
     CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
     reg->exact_end = reg->exact + e->len;
  
-    if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
-      allow_reverse = 1;
-    else
-      allow_reverse =
+    allow_reverse =
 	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
 
     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
@@ -4514,8 +4483,8 @@
 static void
 print_optimize_info(FILE* f, regex_t* reg)
 {
-  static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
-			"EXACT_IC", "MAP" };
+  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+                              "EXACT_IC", "MAP" };
 
   fprintf(f, "optimize: %s\n", on[reg->optimize]);
   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
@@ -4959,7 +4928,7 @@
 }
 
 extern int
-onig_init(void)
+onig_init()
 {
   if (onig_inited != 0)
     return 0;
@@ -4981,9 +4950,9 @@
 
 
 extern int
-onig_end(void)
+onig_end()
 {
-  extern int onig_free_shared_cclass_table(void);
+  extern int onig_free_shared_cclass_table();
 
   THREAD_ATOMIC_START;
 

Modified: trunk/regenc.c
===================================================================
--- trunk/regenc.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regenc.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -32,13 +32,13 @@
 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
 
 extern int
-onigenc_init(void)
+onigenc_init()
 {
   return 0;
 }
 
 extern OnigEncoding
-onigenc_get_default_encoding(void)
+onigenc_get_default_encoding()
 {
   return OnigEncDefaultCharEncoding;
 }
@@ -175,7 +175,7 @@
 
 #define USE_APPLICATION_TO_LOWER_CASE_TABLE
 
-unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -251,7 +251,7 @@
 #endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
 
 #ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncAsciiToUpperCaseTable[256] = {
+const UChar OnigEncAsciiToUpperCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -287,7 +287,7 @@
 };
 #endif
 
-unsigned short OnigEncAsciiCtypeTable[256] = {
+const unsigned short OnigEncAsciiCtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -323,7 +323,7 @@
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
 };
 
-UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -359,7 +359,7 @@
 };
 
 #ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -417,7 +417,7 @@
   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
 }
 
-OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
   { 0x41, 0x61 },
   { 0x42, 0x62 },
   { 0x43, 0x63 },
@@ -475,7 +475,7 @@
 
 extern int
 onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                       OnigPairAmbigCodes** ccs)
+                                       const OnigPairAmbigCodes** ccs)
 {
   if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
     *ccs = OnigAsciiPairAmbigCodes;
@@ -488,16 +488,16 @@
 
 extern int
 onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
-                                         OnigCompAmbigCodes** ccs)
+                                         const OnigCompAmbigCodes** ccs)
 {
   return 0;
 }
 
 extern int
 onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                            OnigPairAmbigCodes** ccs)
+                                            const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xc0, 0xe0 },
     { 0xc1, 0xe1 },
     { 0xc2, 0xe2 },
@@ -577,9 +577,9 @@
 
 extern int
 onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
-                                           OnigCompAmbigCodes** ccs)
+                                           const OnigCompAmbigCodes** ccs)
 {
-  static OnigCompAmbigCodes folds[] = {
+  static const OnigCompAmbigCodes folds[] = {
     { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
   };
 
@@ -593,7 +593,7 @@
 
 extern int
 onigenc_not_support_get_ctype_code_range(int ctype,
-                             OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+                             const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
 {
   return ONIG_NO_SUPPORT_CONFIG;
 }
@@ -830,10 +830,10 @@
   if ((code & 0xff000000) != 0) {
     *p++ = (UChar )((code >> 24) & 0xff);
   }
-  if ((code & 0xff0000) != 0) {
+  if ((code & 0xff0000) != 0 || p != buf) {
     *p++ = (UChar )((code >> 16) & 0xff);
   }
-  if ((code & 0xff00) != 0) {
+  if ((code & 0xff00) != 0 || p != buf) {
     *p++ = (UChar )((code >> 8) & 0xff);
   }
   *p++ = (UChar )(code & 0xff);
@@ -849,40 +849,32 @@
 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
 			  unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
+    }
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 extern int
 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
 			  unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
+    }
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 extern int

Modified: trunk/regenc.h
===================================================================
--- trunk/regenc.h	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regenc.h	2006-02-09 16:14:54 UTC (rev 374)
@@ -4,7 +4,7 @@
   regenc.h -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -65,15 +65,17 @@
 #else  /* ONIG_RUBY_M17N */
 
 #define USE_UNICODE_FULL_RANGE_CTYPE
+/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
+/* #define USE_UNICODE_ALL_LINE_TERMINATORS */  /* see Unicode.org UTF#18 */
 
 #define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII
 
 /* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
 ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
 
 /* methods for single byte encoding */
@@ -105,7 +107,7 @@
 
 /* in enc/unicode.c */
 ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
 
 
 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
@@ -115,10 +117,10 @@
 #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
   ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
 
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[];
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
-ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
+ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
 
 #endif /* is not ONIG_RUBY_M17N */
 
@@ -133,7 +135,7 @@
 ONIG_EXTERN OnigEncoding  OnigEncDefaultCharEncoding;
 ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
 ONIG_EXTERN const UChar  OnigEncAsciiToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
+ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
 
 #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
 #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]

Modified: trunk/regerror.c
===================================================================
--- trunk/regerror.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regerror.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -30,14 +30,20 @@
 #include "regint.h"
 #include <stdio.h> /* for vsnprintf() */
 
+#ifdef HAVE_STDARG_PROTOTYPES
 #include <stdarg.h>
+#define va_init_list(a,b) va_start(a,b)
+#else
+#include <varargs.h>
+#define va_init_list(a,b) va_start(a)
+#endif
 
-extern char*
+extern UChar*
 onig_error_code_to_format(int code)
 {
   char *p;
 
-  if (code >= 0) return (char* )0;
+  if (code >= 0) return (UChar* )0;
 
   switch (code) {
   case ONIG_MISMATCH:
@@ -171,7 +177,7 @@
     p = "undefined error code"; break;
   }
 
-  return p;
+  return (UChar* )p;
 }
 
 
@@ -179,14 +185,21 @@
 #define MAX_ERROR_PAR_LEN   30
 
 extern int
+#ifdef HAVE_STDARG_PROTOTYPES
 onig_error_code_to_str(UChar* s, int code, ...)
+#else
+onig_error_code_to_str(s, code, va_alist)
+  UChar* s;
+  int code;
+  va_dcl 
+#endif
 {
   UChar *p, *q;
   OnigErrorInfo* einfo;
   int len;
   va_list vargs;
 
-  va_start(vargs, code);
+  va_init_list(vargs, code);
 
   switch (code) {
   case ONIGERR_UNDEFINED_NAME_REFERENCE:
@@ -242,26 +255,37 @@
 
 
 void
-onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
-			    char* pat, char* pat_end, char *fmt, ...)
+#ifdef HAVE_STDARG_PROTOTYPES
+onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+                           UChar* pat, UChar* pat_end, const UChar *fmt, ...)
+#else
+onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
+    UChar buf[];
+    int bufsize;
+    OnigEncoding enc;
+    UChar* pat;
+    UChar* pat_end;
+    const UChar *fmt;
+    va_dcl
+#endif
 {
   int n, need, len;
   UChar *p, *s, *bp;
-  char bs[6];
+  UChar bs[6];
   va_list args;
 
-  va_start(args, fmt);
-  n = vsnprintf(buf, bufsize, fmt, args);
+  va_init_list(args, (char* )fmt);
+  n = vsnprintf((char* )buf, bufsize, (char* )fmt, args);
   va_end(args);
 
   need = (pat_end - pat) * 4 + 4;
 
   if (n + need < bufsize) {
-    strcat(buf, ": /");
+    strcat((char* )buf, ": /");
     s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
 
     p = pat;
-    while (p < (UChar* )pat_end) {
+    while (p < pat_end) {
       if (*p == MC_ESC(enc)) {
 	*s++ = *p++;
 	len = enc_len(enc, p);
@@ -280,7 +304,7 @@
           int blen;
 
           while (len-- > 0) {
-            sprintf(bs, "\\%03o", *p++ & 0377);
+            sprintf((char* )bs, "\\%03o", *p++ & 0377);
             blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
             bp = bs;
             while (blen-- > 0) *s++ = *bp++;
@@ -289,7 +313,7 @@
       }
       else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
 	       !ONIGENC_IS_CODE_SPACE(enc, *p)) {
-	sprintf(bs, "\\%03o", *p++ & 0377);
+	sprintf((char* )bs, "\\%03o", *p++ & 0377);
 	len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
         bp = bs;
 	while (len-- > 0) *s++ = *bp++;

Modified: trunk/regexec.c
===================================================================
--- trunk/regexec.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regexec.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -29,6 +29,12 @@
 
 #include "regint.h"
 
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
+  (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
+   ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
+#endif
+
 #ifdef USE_CAPTURE_HISTORY
 static void history_tree_free(OnigCaptureTreeNode* node);
 
@@ -227,7 +233,7 @@
 }
 
 extern OnigRegion*
-onig_region_new(void)
+onig_region_new()
 {
   OnigRegion* r;
 
@@ -1165,27 +1171,43 @@
 }
 
 static int
-code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
+is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
 {
-  unsigned int in_cc;
-  CClassNode* cc = (CClassNode* )node;
+  int found;
 
-  if (enclen == 1 && code < SINGLE_BYTE_SIZE) {
-    in_cc = BITSET_AT(cc->bs, code);
+  if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+    if (IS_NULL(cc->mbuf)) {
+      found = 0;
+    }
+    else {
+      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+    }
   }
   else {
-    UChar* p = ((BBuf* )(cc->mbuf))->p;
-    in_cc = onig_is_in_code_range(p, code);
+    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
   }
 
-  if (IS_CCLASS_NOT(cc)) {
-    return (in_cc ? 0 : 1);
+  if (IS_CCLASS_NOT(cc))
+    return !found;
+  else
+    return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+  int len;
+
+  if (ONIGENC_MBC_MINLEN(enc) > 1) {
+    len = 2;
   }
   else {
-    return (in_cc ? 1 : 0);
+    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
   }
+  return is_code_in_cc(len, code, cc);
 }
 
+
 /* matching region of POSIX API */
 typedef int regoff_t;
 
@@ -1739,8 +1761,9 @@
 	mb_len = enc_len(encode, s);
 	ss = s;
 	s += mb_len;
+	DATA_ENSURE(0);
 	code = ONIGENC_MBC_TO_CODE(encode, ss, s);
-        if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
+	if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
       }
       STAT_OP_OUT;
       break;
@@ -1946,6 +1969,12 @@
 	STAT_OP_OUT;
 	continue;
       }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+      else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+	STAT_OP_OUT;
+	continue;
+      }
+#endif
       goto fail;
       break;
 
@@ -1966,6 +1995,15 @@
 	STAT_OP_OUT;
 	continue;
       }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+      else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+        UChar* ss = s + enc_len(encode, s);
+        if (ON_STR_END(ss + enc_len(encode, ss))) {
+          STAT_OP_OUT;
+          continue;
+        }
+      }
+#endif
       goto fail;
       break;
 
@@ -3029,7 +3067,11 @@
 	  if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
 	    goto retry_gate;
 	}
-	else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end))
+	else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+              && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+                )
 	  goto retry_gate;
 	break;
       }
@@ -3149,7 +3191,11 @@
 	    goto retry;
 	  }
 	}
-	else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) {
+	else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+              && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+                ) {
 	  p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
 	  if (IS_NULL(p)) goto fail;
 	  goto retry;
@@ -3310,7 +3356,7 @@
     }
   }
   else if (str == end) { /* empty string */
-    static const UChar* address_for_empty_string = "";
+    static const UChar* address_for_empty_string = (UChar* )"";
 
 #ifdef ONIG_DEBUG_SEARCH
     fprintf(stderr, "onig_search: empty string.\n");
@@ -3354,8 +3400,11 @@
 	  if (sch_range > end) sch_range = (UChar* )end;
 	}
       }
-      if (reg->dmax != ONIG_INFINITE_DISTANCE &&
-	  (end - start) >= reg->threshold_len) {
+
+      if ((end - start) < reg->threshold_len)
+        goto mismatch;
+
+      if (reg->dmax != ONIG_INFINITE_DISTANCE) {
 	do {
 	  if (! forward_search_range(reg, str, end, s, sch_range,
 				     &low, &high, &low_prev)) goto mismatch;
@@ -3368,22 +3417,26 @@
 	    prev = s;
 	    s += enc_len(reg->enc, s);
 	  }
-	  if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
-	    if (IS_NOT_NULL(prev)) {
-	      while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) &&
-                     s < range) {
-		prev = s;
-		s += enc_len(reg->enc, s);
-	      }
-	    }
-	  }
 	} while (s < range);
 	goto mismatch;
       }
       else { /* check only. */
-	if ((end - start) < reg->threshold_len ||
-	    ! forward_search_range(reg, str, end, s, sch_range,
+	if (! forward_search_range(reg, str, end, s, sch_range,
 				   &low, &high, (UChar** )NULL)) goto mismatch;
+
+        if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+          do {
+            MATCH_AND_RETURN_CHECK;
+            prev = s;
+            s += enc_len(reg->enc, s);
+
+            while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
+              prev = s;
+              s += enc_len(reg->enc, s);
+            }
+          } while (s < range);
+          goto mismatch;
+        }
       }
     }
 
@@ -3391,7 +3444,11 @@
       MATCH_AND_RETURN_CHECK;
       prev = s;
       s += enc_len(reg->enc, s);
-    } while (s <= range);   /* exec s == range, because empty match with /$/. */
+    } while (s < range);
+
+    if (s == range) { /* because empty match with /$/. */
+      MATCH_AND_RETURN_CHECK;
+    }
   }
   else {  /* backward search */
     if (reg->optimize != ONIG_OPTIMIZE_NONE) {

Modified: trunk/regint.h
===================================================================
--- trunk/regint.h	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regint.h	2006-02-09 16:14:54 UTC (rev 374)
@@ -62,6 +62,11 @@
 #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
 #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE     /* /\n$/ =~ "\n" */
 #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* treat \r\n as line terminator.
+   !!! NO SUPPORT !!!
+   use this configuration on your own responsibility */
+/* #define USE_CRNL_AS_LINE_TERMINATOR */
+
 /* internal config */
 #define USE_RECYCLE_NODE
 #define USE_OP_PUSH_OR_JUMP_EXACT
@@ -105,8 +110,8 @@
   }\
 } while (0)
 
-#define DEFAULT_WARN_FUNCTION        rb_warn
-#define DEFAULT_VERB_WARN_FUNCTION   rb_warning
+#define DEFAULT_WARN_FUNCTION        onig_rb_warn
+#define DEFAULT_VERB_WARN_FUNCTION   onig_rb_warning
 
 #endif /* else NOT_RUBY */
 
@@ -721,6 +726,11 @@
 #define MC_ONE_OR_MORE_TIME(enc)  (enc)->meta_char_table.one_or_more_time
 #define MC_ANYCHAR_ANYTIME(enc)   (enc)->meta_char_table.anychar_anytime
 
+#define IS_MC_ESC_CODE(code, enc, syn) \
+  ((code) == MC_ESC(enc) && \
+   !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
+
+
 #define SYN_POSIX_COMMON_OP \
  ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
    ONIG_SYN_OP_DECIMAL_BACKREF | \
@@ -781,13 +791,14 @@
 #endif
 #endif
 
-extern char* onig_error_code_to_format P_((int code));
-extern void  onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
+extern UChar* onig_error_code_to_format P_((int code));
+extern void  onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
 extern int  onig_bbuf_init P_((BBuf* buf, int size));
 extern int  onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
 extern int  onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
 extern void onig_chain_reduce P_((regex_t* reg));
 extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
 extern void onig_transfer P_((regex_t* to, regex_t* from));
+extern int  onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
 
 #endif /* REGINT_H */

Modified: trunk/regparse.c
===================================================================
--- trunk/regparse.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regparse.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -58,8 +58,22 @@
 
 OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
 
-extern void onig_null_warn(const char* s, ...) { }
+extern void onig_null_warn(const char* s) { }
 
+#ifdef RUBY_PLATFORM
+extern void
+onig_rb_warn(const char* s)
+{
+  rb_warn(s);
+}
+
+extern void
+onig_rb_warning(const char* s)
+{
+  rb_warning(s);
+}
+#endif
+
 #ifdef DEFAULT_WARN_FUNCTION
 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
 #else
@@ -1050,12 +1064,12 @@
 
 #ifdef USE_RECYCLE_NODE
 extern int
-onig_free_node_list(void)
+onig_free_node_list()
 {
   FreeNode* n;
 
   THREAD_ATOMIC_START;
-  while (FreeNodeList) {
+  while (IS_NOT_NULL(FreeNodeList)) {
     n = FreeNodeList;
     FreeNodeList = FreeNodeList->next;
     xfree(n);
@@ -1066,18 +1080,19 @@
 #endif
 
 static Node*
-node_new(void)
+node_new()
 {
   Node* node;
 
 #ifdef USE_RECYCLE_NODE
+  THREAD_ATOMIC_START;
   if (IS_NOT_NULL(FreeNodeList)) {
-    THREAD_ATOMIC_START;
     node = (Node* )FreeNodeList;
     FreeNodeList = FreeNodeList->next;
     THREAD_ATOMIC_END;
     return node;
   }
+  THREAD_ATOMIC_END;
 #endif
 
   node = (Node* )xmalloc(sizeof(Node));
@@ -1094,7 +1109,7 @@
 }
 
 static Node*
-node_new_cclass(void)
+node_new_cclass()
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
@@ -1106,7 +1121,7 @@
 
 static Node*
 node_new_cclass_by_codepoint_range(int not,
-                   OnigCodePoint sbr[], OnigCodePoint mbr[])
+                   const OnigCodePoint sbr[], const OnigCodePoint mbr[])
 {
   CClassNode* cc;
   int n, i, j;
@@ -1163,7 +1178,7 @@
 }
 
 static Node*
-node_new_anychar(void)
+node_new_anychar()
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
@@ -1434,7 +1449,7 @@
 }
 
 static Node*
-node_new_empty(void)
+node_new_empty()
 {
   return node_new_str(NULL, NULL);
 }
@@ -2358,15 +2373,17 @@
     control:
       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
       PFETCH(c);
-      if (c == MC_ESC(enc)) {
-	v = fetch_escaped_value(&p, end, env);
-	if (v < 0) return v;
-        c = (OnigCodePoint )(v & 0x9f);
+      if (c == '?') {
+	c = 0177;
       }
-      else if (c == '?')
-	c = 0177;
-      else
+      else {
+        if (c == MC_ESC(enc)) {
+          v = fetch_escaped_value(&p, end, env);
+          if (v < 0) return v;
+          c = (OnigCodePoint )v;
+        }
 	c &= 0x9f;
+      }
       break;
     }
     /* fall through */
@@ -2512,11 +2529,11 @@
 
   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
-    char buf[WARN_BUFSIZE];
+    UChar buf[WARN_BUFSIZE];
     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
 		env->pattern, env->pattern_end,
-		"character class has '%s' without escape", c);
-    (*onig_warn)(buf);
+                (UChar* )"character class has '%s' without escape", c);
+    (*onig_warn)((char* )buf);
   }
 }
 
@@ -2526,11 +2543,11 @@
   if (onig_warn == onig_null_warn) return ;
 
   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
-    char buf[WARN_BUFSIZE];
+    UChar buf[WARN_BUFSIZE];
     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
 		(env)->pattern, (env)->pattern_end,
-		"regular expression has '%s' without escape", c);
-    (*onig_warn)(buf);
+		(UChar* )"regular expression has '%s' without escape", c);
+    (*onig_warn)((char* )buf);
   }
 }
 
@@ -2621,6 +2638,8 @@
   tok->type = TK_CHAR;
   tok->base = 0;
   tok->u.c  = c;
+  tok->escaped = 0;
+
   if (c == ']') {
     tok->type = TK_CC_CLOSE;
   }
@@ -2792,7 +2811,7 @@
 	tok->type = TK_CC_CC_OPEN;
       }
       else {
-	CC_ESC_WARN(env, "[");
+	CC_ESC_WARN(env, (UChar* )"[");
       }
     }
   }
@@ -2831,7 +2850,7 @@
   tok->backp = p;
 
   PFETCH(c);
-  if (c == MC_ESC(enc)) {
+  if (IS_MC_ESC_CODE(c, enc, syn)) {
     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
 
     tok->backp = p;
@@ -3363,7 +3382,7 @@
 
     case ']':
       if (*src > env->pattern)   /* /].../ is allowed. */
-	CCEND_ESC_WARN(env, "]");
+	CCEND_ESC_WARN(env, (UChar* )"]");
       break;
 
     case '#':
@@ -3398,7 +3417,7 @@
 
 static int
 add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
-                         OnigCodePoint sbr[], OnigCodePoint mbr[])
+                         const OnigCodePoint sbr[], const OnigCodePoint mbr[])
 {
   int i, r;
   OnigCodePoint j;
@@ -3462,7 +3481,7 @@
 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
 {
   int c, r;
-  OnigCodePoint *sbr, *mbr;
+  const OnigCodePoint *sbr, *mbr;
   OnigEncoding enc = env->enc;
 
   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
@@ -3600,19 +3619,19 @@
 #define POSIX_BRACKET_NAME_MAX_LEN         6
 
   static PosixBracketEntryType PBS[] = {
-    { "alnum",  ONIGENC_CTYPE_ALNUM,  5 },
-    { "alpha",  ONIGENC_CTYPE_ALPHA,  5 },
-    { "blank",  ONIGENC_CTYPE_BLANK,  5 },
-    { "cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
-    { "digit",  ONIGENC_CTYPE_DIGIT,  5 },
-    { "graph",  ONIGENC_CTYPE_GRAPH,  5 },
-    { "lower",  ONIGENC_CTYPE_LOWER,  5 },
-    { "print",  ONIGENC_CTYPE_PRINT,  5 },
-    { "punct",  ONIGENC_CTYPE_PUNCT,  5 },
-    { "space",  ONIGENC_CTYPE_SPACE,  5 },
-    { "upper",  ONIGENC_CTYPE_UPPER,  5 },
-    { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
-    { "ascii",  ONIGENC_CTYPE_ASCII,  5 }, /* I don't know origin. Perl? */
+    { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
+    { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
+    { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
+    { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
+    { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
+    { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
+    { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
+    { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
+    { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
+    { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
+    { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
+    { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+    { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
     { (UChar* )NULL, -1, 0 }
   };
 
@@ -3636,7 +3655,7 @@
   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
       p = (UChar* )onigenc_step(enc, p, end, pb->len);
-      if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0)
+      if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
 	return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
 
       r = add_ctype_to_cc(cc, pb->ctype, not, env);
@@ -3671,19 +3690,19 @@
 property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
 {
   static PosixBracketEntryType PBS[] = {
-    { "Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
-    { "Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
-    { "Blank",  ONIGENC_CTYPE_BLANK,  5 },
-    { "Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
-    { "Digit",  ONIGENC_CTYPE_DIGIT,  5 },
-    { "Graph",  ONIGENC_CTYPE_GRAPH,  5 },
-    { "Lower",  ONIGENC_CTYPE_LOWER,  5 },
-    { "Print",  ONIGENC_CTYPE_PRINT,  5 },
-    { "Punct",  ONIGENC_CTYPE_PUNCT,  5 },
-    { "Space",  ONIGENC_CTYPE_SPACE,  5 },
-    { "Upper",  ONIGENC_CTYPE_UPPER,  5 },
-    { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
-    { "ASCII",  ONIGENC_CTYPE_ASCII,  5 },
+    { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
+    { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
+    { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
+    { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
+    { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
+    { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
+    { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
+    { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
+    { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
+    { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
+    { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
+    { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+    { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
     { (UChar* )NULL, -1, 0 }
   };
 
@@ -3933,7 +3952,7 @@
                            *src, env->pattern_end, 1, env->enc))
       return ONIGERR_EMPTY_CHAR_CLASS;
 
-    CC_ESC_WARN(env, "]");
+    CC_ESC_WARN(env, (UChar* )"]");
     r = tok->type = TK_CHAR;  /* allow []...] */
   }
 
@@ -4036,7 +4055,7 @@
       r = parse_posix_bracket(cc, &p, end, env);
       if (r < 0) goto err;
       if (r == 1) {  /* is not POSIX bracket */
-	CC_ESC_WARN(env, "[");
+	CC_ESC_WARN(env, (UChar* )"[");
 	p = tok->backp;
 	v = (OnigCodePoint )tok->u.c;
 	in_israw = 0;
@@ -4082,7 +4101,7 @@
 	  goto val_entry;
 	}
 	else if (r == TK_CC_AND) {
-	  CC_ESC_WARN(env, "-");
+	  CC_ESC_WARN(env, (UChar* )"-");
 	  goto range_end_val;
 	}
 	state = CCS_RANGE;
@@ -4097,12 +4116,12 @@
 	fetched = 1;
 	/* [--x] or [a&&-x] is warned. */
 	if (r == TK_CC_RANGE || and_start != 0)
-	  CC_ESC_WARN(env, "-");
+	  CC_ESC_WARN(env, (UChar* )"-");
 
 	goto val_entry;
       }
       else if (state == CCS_RANGE) {
-	CC_ESC_WARN(env, "-");
+	CC_ESC_WARN(env, (UChar* )"-");
 	goto sb_char;  /* [!--x] is allowed */
       }
       else { /* CCS_COMPLETE */
@@ -4111,12 +4130,12 @@
 	fetched = 1;
 	if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
 	else if (r == TK_CC_AND) {
-	  CC_ESC_WARN(env, "-");
+	  CC_ESC_WARN(env, (UChar* )"-");
 	  goto range_end_val;
 	}
 	
 	if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
-	  CC_ESC_WARN(env, "-");
+	  CC_ESC_WARN(env, (UChar* )"-");
 	  goto sb_char;   /* [0-9-a] is allowed as [0-9\-a] */
 	}
 	r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
@@ -4493,7 +4512,7 @@
       if (qn->by_number == 0 && qnt->by_number == 0 &&
 	  IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
         int nestq_num, targetq_num;
-        char buf[WARN_BUFSIZE];
+        UChar buf[WARN_BUFSIZE];
 
         nestq_num   = popular_qualifier_num(qn);
         targetq_num = popular_qualifier_num(qnt);
@@ -4505,9 +4524,9 @@
         case RQ_DEL:
           if (onig_verb_warn != onig_null_warn) {
             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
-                                       env->pattern, env->pattern_end,
-                                       "redundant nested repeat operator");
-            (*onig_verb_warn)(buf);
+                                 env->pattern, env->pattern_end,
+                                 (UChar* )"redundant nested repeat operator");
+            (*onig_verb_warn)((char* )buf);
           }
           goto warn_exit;
           break;
@@ -4516,10 +4535,10 @@
           if (onig_verb_warn != onig_null_warn) {
             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
                                        env->pattern, env->pattern_end,
-            "nested repeat operator %s and %s was replaced with '%s'",
+            (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
             PopularQStr[targetq_num], PopularQStr[nestq_num],
             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
-            (*onig_verb_warn)(buf);
+            (*onig_verb_warn)((char* )buf);
           }
           goto warn_exit;
           break;
@@ -4551,8 +4570,8 @@
   int r, i, j, k, clen, len, ncode, n;
   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
   Node **ptail, *snode = NULL_NODE;
-  OnigCompAmbigCodes* ccs;
-  OnigCompAmbigCodeItem* ci;
+  const OnigCompAmbigCodes* ccs;
+  const OnigCompAmbigCodeItem* ci;
   OnigAmbigType amb;
 
   n = 0;
@@ -4660,7 +4679,7 @@
 }
 
 extern int
-onig_free_shared_cclass_table(void)
+onig_free_shared_cclass_table()
 {
   if (IS_NOT_NULL(OnigTypeCClassTable)) {
     onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
@@ -4817,7 +4836,7 @@
 	  int ctype, not;
 
 #ifdef USE_SHARED_CCLASS_TABLE
-          OnigCodePoint *sbr, *mbr;
+          const OnigCodePoint *sbr, *mbr;
 
 	  ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
           r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
@@ -4899,7 +4918,7 @@
 
       if (IS_IGNORECASE(env->option)) {
         int i, n, in_cc;
-        OnigPairAmbigCodes* ccs;
+        const OnigPairAmbigCodes* ccs;
         BitSetRef bs = cc->bs;
         OnigAmbigType amb;
 

Modified: trunk/regparse.h
===================================================================
--- trunk/regparse.h	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/regparse.h	2006-02-09 16:14:54 UTC (rev 374)
@@ -290,7 +290,6 @@
 extern int    onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
 #endif
 
-extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
 extern int    onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
 extern void   onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
 extern int    onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
@@ -303,7 +302,7 @@
 extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));
 extern Node*  onig_node_new_list P_((Node* left, Node* right));
 extern void   onig_node_str_clear P_((Node* node));
-extern int    onig_free_node_list(void);
+extern int    onig_free_node_list();
 extern int    onig_names_free P_((regex_t* reg));
 extern int    onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
 

Modified: trunk/sjis.c
===================================================================
--- trunk/sjis.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/sjis.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -29,7 +29,7 @@
 
 #include "regenc.h"
 
-static int EncLen_SJIS[] = {
+static const int EncLen_SJIS[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -167,21 +167,16 @@
 static int
 sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else {
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
     }
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 static UChar*

Modified: trunk/utf8.c
===================================================================
--- trunk/utf8.c	2006-02-09 16:12:41 UTC (rev 373)
+++ trunk/utf8.c	2006-02-09 16:14:54 UTC (rev 374)
@@ -2,7 +2,7 @@
   utf8.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
 
 #define utf8_islead(c)     ((UChar )((c) & 0xc0) != 0x80)
 
-static int EncLen_UTF8[] = {
+static const int EncLen_UTF8[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -65,6 +65,29 @@
   return EncLen_UTF8[*p];
 }
 
+static int
+utf8_is_mbc_newline(const UChar* p, const UChar* end)
+{
+  if (p < end) {
+    if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+    if (*p == 0x0d) return 1;
+    if (p + 1 < end) {
+      if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+	return 1;
+      if (p + 2 < end) {
+	if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+	    && *(p+1) == 0x80 && *p == 0xe2)  /* U+2028, U+2029 */
+	  return 1;
+      }
+    }
+#endif
+  }
+
+  return 0;
+}
+
 static OnigCodePoint
 utf8_mbc_to_code(const UChar* p, const UChar* end)
 {
@@ -307,16 +330,16 @@
 }
 
 
-static OnigCodePoint EmptyRange[] = { 0 };
+static const OnigCodePoint EmptyRange[] = { 0 };
 
-static OnigCodePoint SBAlnum[] = {
+static const OnigCodePoint SBAlnum[] = {
   3,
   0x0030, 0x0039,
   0x0041, 0x005a,
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBAlnum[] = {
+static const OnigCodePoint MBAlnum[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   411,
 #else
@@ -738,13 +761,13 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBAlnum */
 
-static OnigCodePoint SBAlpha[] = {
+static const OnigCodePoint SBAlpha[] = {
   2,
   0x0041, 0x005a,
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBAlpha[] = {
+static const OnigCodePoint MBAlpha[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   394,
 #else
@@ -1149,13 +1172,13 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBAlpha */
 
-static OnigCodePoint SBBlank[] = {
+static const OnigCodePoint SBBlank[] = {
   2,
   0x0009, 0x0009,
   0x0020, 0x0020
 };
 
-static OnigCodePoint MBBlank[] = {
+static const OnigCodePoint MBBlank[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   7,
 #else
@@ -1173,13 +1196,13 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBBlank */
 
-static OnigCodePoint SBCntrl[] = {
+static const OnigCodePoint SBCntrl[] = {
   2,
   0x0000, 0x001f,
   0x007f, 0x007f
 };
 
-static OnigCodePoint MBCntrl[] = {
+static const OnigCodePoint MBCntrl[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   18,
 #else
@@ -1208,12 +1231,12 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBCntrl */
 
-static OnigCodePoint SBDigit[] = {
+static const OnigCodePoint SBDigit[] = {
   1,
   0x0030, 0x0039
 };
 
-static OnigCodePoint MBDigit[] = {
+static const OnigCodePoint MBDigit[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   22,
 #else
@@ -1245,12 +1268,12 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBDigit */
 
-static OnigCodePoint SBGraph[] = {
+static const OnigCodePoint SBGraph[] = {
   1,
   0x0021, 0x007e
 };
 
-static OnigCodePoint MBGraph[] = {
+static const OnigCodePoint MBGraph[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   404,
 #else
@@ -1665,12 +1688,12 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBGraph */
 
-static OnigCodePoint SBLower[] = {
+static const OnigCodePoint SBLower[] = {
   1,
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBLower[] = {
+static const OnigCodePoint MBLower[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   423,
 #else
@@ -2104,13 +2127,13 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBLower */
 
-static OnigCodePoint SBPrint[] = {
+static const OnigCodePoint SBPrint[] = {
   2,
   0x0009, 0x000d,
   0x0020, 0x007e
 };
 
-static OnigCodePoint MBPrint[] = {
+static const OnigCodePoint MBPrint[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   403,
 #else
@@ -2524,7 +2547,7 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBPrint */
 
-static OnigCodePoint SBPunct[] = {
+static const OnigCodePoint SBPunct[] = {
   9,
   0x0021, 0x0023,
   0x0025, 0x002a,
@@ -2537,7 +2560,7 @@
   0x007d, 0x007d
 }; /* end of SBPunct */
 
-static OnigCodePoint MBPunct[] = {
+static const OnigCodePoint MBPunct[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   77,
 #else
@@ -2625,13 +2648,13 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBPunct */
 
-static OnigCodePoint SBSpace[] = {
+static const OnigCodePoint SBSpace[] = {
   2,
   0x0009, 0x000d,
   0x0020, 0x0020
 };
 
-static OnigCodePoint MBSpace[] = {
+static const OnigCodePoint MBSpace[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   9,
 #else
@@ -2651,12 +2674,12 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBSpace */
 
-static OnigCodePoint SBUpper[] = {
+static const OnigCodePoint SBUpper[] = {
   1,
   0x0041, 0x005a
 };
 
-static OnigCodePoint MBUpper[] = {
+static const OnigCodePoint MBUpper[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   420,
 #else
@@ -3087,19 +3110,19 @@
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBUpper */
 
-static OnigCodePoint SBXDigit[] = {
+static const OnigCodePoint SBXDigit[] = {
   3,
   0x0030, 0x0039,
   0x0041, 0x0046,
   0x0061, 0x0066
 };
 
-static OnigCodePoint SBASCII[] = {
+static const OnigCodePoint SBASCII[] = {
   1,
   0x0000, 0x007f
 };
 
-static OnigCodePoint SBWord[] = {
+static const OnigCodePoint SBWord[] = {
   4,
   0x0030, 0x0039,
   0x0041, 0x005a,
@@ -3107,7 +3130,7 @@
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBWord[] = {
+static const OnigCodePoint MBWord[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   432,
 #else
@@ -3554,7 +3577,7 @@
 
 static int
 utf8_get_ctype_code_range(int ctype,
-                          OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+                          const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
 {
 #define CR_SET(sbl,mbl) do { \
   *sbr = sbl; \
@@ -3622,7 +3645,7 @@
 utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  OnigCodePoint *range;
+  const OnigCodePoint *range;
 #endif
 
   if (code < 256) {
@@ -3674,6 +3697,9 @@
   case ONIGENC_CTYPE_ALNUM:
     range = MBAlnum;
     break;
+  case ONIGENC_CTYPE_NEWLINE:
+    return FALSE;
+    break;
 
   default:
     return ONIGENCERR_TYPE_BUG;
@@ -3723,7 +3749,7 @@
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
   },
-  onigenc_is_mbc_newline_0x0a,
+  utf8_is_mbc_newline,
   utf8_mbc_to_code,
   utf8_code_to_mbclen,
   utf8_code_to_mbc,


-- 
ML: yarv-diff quickml.atdot.net
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧][生データ]