日本語識別子

GCCで日本語の識別子を使いたいと思い、GCCのソースを適当にいじる。

--- libcpp/charset.c.orig
+++ libcpp/charset.c
@@ -80,5 +80,5 @@ Foundation, 51 Franklin Street, Fifth Fl
 
 #if HOST_CHARSET == HOST_CHARSET_ASCII
-#define SOURCE_CHARSET "UTF-8"
+#define SOURCE_CHARSET "C99"
 #define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
 #elif HOST_CHARSET == HOST_CHARSET_EBCDIC
@@ -692,13 +692,13 @@ cpp_init_iconv (cpp_reader *pfile)
     /* This effectively means that wide strings are not supported,
        so don't do any conversion at all.  */
-   default_wcset = SOURCE_CHARSET;
+   default_wcset = "UTF-8";
 
   if (!ncset)
-    ncset = SOURCE_CHARSET;
+    ncset = "UTF-8";
   if (!wcset)
     wcset = default_wcset;
 
-  pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
-  pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET);
+  pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, "UTF-8");
+  pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, "UTF-8");
 }
 
--- libcpp/init.c.orig
+++ libcpp/init.c
@@ -81,11 +81,11 @@ struct lang_flags
 static const struct lang_flags lang_defaults[] =
 { /*              c99 c++ xnum xid std  //   digr  */
-  /* GNUC89 */  { 0,  0,  1,   0,  0,   1,   1     },
-  /* GNUC99 */  { 1,  0,  1,   0,  0,   1,   1     },
+  /* GNUC89 */  { 0,  0,  1,   1,  0,   1,   1     },
+  /* GNUC99 */  { 1,  0,  1,   1,  0,   1,   1     },
   /* STDC89 */  { 0,  0,  0,   0,  1,   0,   0     },
   /* STDC94 */  { 0,  0,  0,   0,  1,   0,   1     },
-  /* STDC99 */  { 1,  0,  1,   0,  1,   1,   1     },
-  /* GNUCXX */  { 0,  1,  1,   0,  0,   1,   1     },
-  /* CXX98  */  { 0,  1,  1,   0,  1,   1,   1     },
+  /* STDC99 */  { 1,  0,  1,   1,  1,   1,   1     },
+  /* GNUCXX */  { 0,  1,  1,   1,  0,   1,   1     },
+  /* CXX98  */  { 0,  1,  1,   1,  1,   1,   1     },
   /* ASM    */  { 0,  0,  1,   0,  0,   1,   0     }
   /* xid should be 1 for GNUC99, STDC99, GNUCXX and CXX98 when no
--- libcpp/macro.c.orig
+++ libcpp/macro.c
@@ -327,11 +327,13 @@ cpp_quote_string (uchar *dest, const uch
       uchar c = *src++;
 
-      if (c == '\\' || c == '"')
-	{
-	  *dest++ = '\\';
-	  *dest++ = c;
-	}
-      else
-	  *dest++ = c;
+      if((c == '\\' && !(toupper(*src) == 'U')) || c == '"')
+        *dest++ = '\\';
+      if (c == '\\' && *src == '\\')
+        {
+          src++; len--;
+          *dest++ = '\\';
+          *dest++ = '\\';
+        }
+      *dest++ = c;
     }
 

これでなんかうまくいくっぽい。これでいいのかな?

テストに使ったコード

#include <stdio.h>
#define 文字列化(マクロ引数) #マクロ引数

int はげ(int 引数) { return 引数; }

int main()
{
  int 日本語 = 3;
  printf("テスト:%s = %d\n", 文字列化(日本語), はげ(日本語));
  return 0;
}

バグを見つけた人はコメント欄に書いてね(直さないけど)。